Пример #1
0
    def search(self, query, offset):
        """
    Search function for YouTube.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.OpenSearch.Response
    
    Raises:
    
    * urllib2.URLError
    
    """

        try:
            pos = self._origin() + offset
            url = 'http://gdata.youtube.com/feeds/api/videos?vq={0}&racy=exclude&orderby=viewCount&start-index={1}'.format(
                urllib2.quote(query.search_terms), pos)

            data = urllib2.urlopen(url)
            return Response.parse_feed(data.read())

        # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("YouTube", e, errorType='urllib2', url=url)
Пример #2
0
  def search(self, query, offset):
    """
    Search function for Twitter.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.Response
    
    Raises:
    
    * urllib2.URLError    
    """    
    try:	
      pos = self._origin() + offset   
      url = 'http://search.twitter.com/search.atom?q={0}&lang={1}&page={2}&result_type={3}&rpp={4}&include_entities={5}'.format(urllib2.quote(query.search_terms), self.language, pos, self.type, self.resultsPerPage, self.includeEntities)
	 
      if self.geocode:
        url += '&geocode:{0}'.format(self.geocode)

      data = urllib2.urlopen(url)
      return Response.parse_feed(data.read())

    # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("Twitter", e, errorType = 'urllib2', url = url)
Пример #3
0
  def search(self, query, offset):
    """
    Search function for retrieving results from the PuppyIR Pathfinder service which searches the information centre at the Emma Children's Hospital.
  
    Parameters:
  
    * query (puppy.model.Query)

    * offset (int): result offset for the search
  
    Returns:
  
    * results puppy.model.Response
  
    Raises:
  
    * urllib2.URLError
  
    """
    try:
      pos = self._origin() + offset
      format = 'rss'
      url = "http://pathfinder.cs.utwente.nl/cgi-bin/opensearch/ekz.cgi?query={0}&page={1}&format={2}&leeftijd={3}&size={4}".format(urllib2.quote(query.search_terms), pos, format, self.age, self.resultsPerPage)
    
      data = urllib2.urlopen(url)
      emmaResponse = Response.parse_feed(data.read())
      emmaResponse = self.addEmmaAge(emmaResponse)
      emmaResponse = self.addEmmaItemId(emmaResponse)
      return emmaResponse

	# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("PuppyIR Pathfinder Search", e, errorType = 'urllib2', url = url)
Пример #4
0
    def search(self, query, offset):
        """
    Search function for YouTube.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.OpenSearch.Response
    
    Raises:
    
    * urllib2.URLError
    
    """

        try:
            pos = self._origin() + offset
            url = "http://gdata.youtube.com/feeds/api/videos?vq={0}&racy=exclude&orderby=viewCount&start-index={1}".format(
                urllib2.quote(query.search_terms), pos
            )

            data = urllib2.urlopen(url)
            return Response.parse_feed(data.read())

        # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("YouTube", e, errorType="urllib2", url=url)
Пример #5
0
    def search(self, query, offset):
        """
    Search function for Microsoft Bing.
    
    Parameters:
    
    * query (puppy.model.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * results (puppy.model.Response)
    
    Raises:
    
    * urllib2.URLError
    """
        def addDefaultThumbnails(bingResponse):
            """This goes through the results and adds a easy access default thumbnail."""
            for result in bingResponse.entries:
                result['thumbnail'] = result['media_thumbnail'][0]['url']
                result['thumbnailWidth'] = result['media_thumbnail'][0][
                    'width']
                result['thumbnailHeight'] = result['media_thumbnail'][0][
                    'height']
            return bingResponse

        try:
            formattedQuery = urllib2.quote(
                self._modify_query(query.search_terms))
            pos = self._origin()

            if (offset > 0):
                pos = pos + (offset * self.resultsPerPage)

            url = 'http://api.search.live.net/rss.aspx?&query={0}&source={1}&{1}.count={2}&{1}.offset={3}&Adult={4}&Market={5}'.format(
                formattedQuery, self.source, self.resultsPerPage, pos,
                self.adult, self.market)

            # If the source type is web or phonebook we can add lon/lat/radius for local search
            if (self.source == 'web') or (self.source == 'phonebook'):
                if (self.lat) and (self.lon) and (self.radius):
                    url += "&Latitude={0}&Longitude={1}&Radius={2}".format(
                        self.lat, self.lon, self.radius)

            data = urllib2.urlopen(url)
            bingResponse = Response.parse_feed(data.read())

            if self.source == 'image':
                bingResponse = addDefaultThumbnails(bingResponse)

            return bingResponse

# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("Bing", e, errorType='urllib2', url=url)
Пример #6
0
  def search(self, query, offset):
    """
    Search function for YouTube.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.OpenSearch.Response
    
    Raises:
    
    * urllib2.URLError
    
    """
    def addExtraFields(youtubeResponse):
      """This goes through the results and adds: the summary field, the embed url and adds a thumbnail shortcut."""
      for result in youtubeResponse.entries:
        author = result['author']
        fullDescription = result['media_group'] # This is author+description+'youtube'
        result['summary'] = fullDescription[len(author):len(fullDescription)-7] #Remove author from start and 'youtube' from end - Perhaps find more elegant method
        result['embedUrl'] = 'http://www.youtube.com/embed/' + result['id'].split(':video:')[1]

        if len(result['media_thumbnail']) >= 2: # If we have 2 or more thumbnails use the second (hq thumbnail)
          result['thumbnail'] = result['media_thumbnail'][1]['url']
        elif len(result['media_thumbnail']) == 1: # Otherwise use the first (it's pretty low res compared to above)
          result['thumbnail'] = result['media_thumbnail'][0]['url']
        else:
          result['thumbnail'] = '' # If that fails just leave it blank

      return youtubeResponse
      
    try:
      pos = self._origin() + (offset * self.resultsPerPage)
      url = 'http://gdata.youtube.com/feeds/api/videos?q={0}&max-results={1}&safeSearch={2}&start-index={3}&orderby={4}&v=2'.format(urllib2.quote(query.search_terms), self.resultsPerPage, self.safeSearch, pos, self.orderBy)
    
      if self.format:
        url += "&format={0}".format(self.format)
            
      if self.location and self.locationRadius:
        url+= "&location-radius={0}&location={1}".format(self.locationRadius, self.location)
        if self.onlyLocation == True:
          url += '!' # This forces YouTube to only return results with a location 

      data = urllib2.urlopen(url)
      youtubeResponse = Response.parse_feed(data.read())
      youtubeResponse = addExtraFields(youtubeResponse) # Does some processing to get embed url, summary and thumbnail shortcut
      return youtubeResponse

    # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("YouTube V2", e, errorType = 'urllib2', url = url)
Пример #7
0
  def search(self, query, offset):
    """
    Search function for Microsoft Bing.
    
    Parameters:
    
    * query (puppy.model.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * results (puppy.model.Response)
    
    Raises:
    
    * urllib2.URLError
    """

    def addDefaultThumbnails(bingResponse):
      """This goes through the results and adds a easy access default thumbnail."""
      for result in bingResponse.entries:
        result['thumbnail'] = result['media_thumbnail'][0]['url']
        result['thumbnailWidth'] = result['media_thumbnail'][0]['width']
        result['thumbnailHeight'] = result['media_thumbnail'][0]['height']
      return bingResponse

    try:
      formattedQuery = urllib2.quote(self._modify_query(query.search_terms))
      pos = self._origin()

      if (offset > 0):
        pos = pos + (offset * self.resultsPerPage)   

      url = 'http://api.search.live.net/rss.aspx?&query={0}&source={1}&{1}.count={2}&{1}.offset={3}&Adult={4}&Market={5}'.format(formattedQuery, self.source, self.resultsPerPage, pos, self.adult, self.market)
      
      # If the source type is web or phonebook we can add lon/lat/radius for local search
      if(self.source == 'web') or (self.source == 'phonebook'):
        if (self.lat) and (self.lon) and (self.radius):
          url += "&Latitude={0}&Longitude={1}&Radius={2}".format(self.lat, self.lon, self.radius)
      
      data = urllib2.urlopen(url)
      bingResponse = Response.parse_feed(data.read())

      if self.source == 'image':
        bingResponse = addDefaultThumbnails(bingResponse)          
          
      return bingResponse

	# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("Bing", e, errorType = 'urllib2', url = url)
Пример #8
0
  def search(self, query, limit):
    """
    Search function for Hospial Data.
  
    Parameters:
  
    * query (puppy.model.Query)

    * limit (int): maximum number of results returned by the search
  
    Returns:
  
    * results puppy.model.Response
  
    Raises:
  
    * Exception 
  
    """
    try:
      conn = _mssql.connect(server=self._server, user=self._user, password=self._password, database=self._database)

#      conn.execute_query('SELECT TOP ' + str(limit)+ ' * FROM '+ _database_bookstable + ' INNER JOIN FREETEXTTABLE('+ _database_bookstable+',*, \''+ query+'\') as ft ON ft.[KEY]=' + _database_bookstable_id+ ' ORDER BY ft.RANK DESC FOR xml raw, ELEMENTS;')
  
      sql_query ='SELECT  temp.Id, temp.Titel, temp.Auteurs, temp.Leeftijdscategorie, temp.Annotatie, temp.ISBN, temp.Editie, temp.Uitgever, temp.[Classificatie code], temp.[Jaar van uitgave], temp.[Plaats van uitgave], temp.[Ref Type], temp.[Prijs], temp.[Serie], img.location  FROM (SELECT TOP ' + str(self.resultsPerPage)+ ' * FROM '+ str(self._database_bookstable) + ' INNER JOIN FREETEXTTABLE('+ self._database_bookstable+',*,\''+ query.search_terms+'\') as ft ON ft.[KEY]=Id ORDER BY ft.RANK DESC) as temp LEFT OUTER JOIN '+ str(self._database_images)+' as img ON temp.Id = img.item_id  FOR xml raw, ELEMENTS;';
    
  
      conn.execute_query(sql_query)
      response=""
   
      for row in conn:
	response = response + row[0].strip()

      response = self.cleanXmlEmma(response)
      #add rss tag for parsing
      response = "<rss>" + response + "</rss>"

      
      results = Response.parse_feed(response)

      results = self.parseEmmaAge(results)
      results = self.parseEmmaLinks(results)
      return results

     # Catch all exception, just in case
    except Exception, e:
      raise SearchEngineError("PuppyIR Emma Sql Server Search", e)
Пример #9
0
    def search(self, query, pos=0):
        """Search function for OpenSearch compliant website.
    
    If a template exists, a search will be executed immediately using the search template,
    Otherwise, given the site URL, a template will attempt to be discovered.
    
    Parameters:
    
    * query (puppy.model.Query)
    * pos (int)
    
    Returns:
    
    * results (puppy.model.Response)
    
    Raises:
    
    * urllib2.URLError
    
    """

        if self.template:
            # template exists, use it to search
            search_url = self.template.replace(
                '{searchTerms}', urllib2.quote(query.search_terms))
            if (pos != 0):
                search_url = search_url.replace('{start}', urllib2.quote(pos))
            else:
                pass
            try:
                response = urllib2.urlopen(search_url).read()
                if self.results and self.xml:
                    return Response.parse_feed(response)
                elif not self.results and self.xml:
                    return Response.parse_xml_suggestions(response)
                elif not self.results and not self.xml:
                    return Response.parse_json_suggestions(response)
            except urllib2.URLError, e:
                print "Opensearch for {0} failed".format(self.url)
Пример #10
0
    def search(self, query, offset):
        """
    Search function for Picassa.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.OpenSearch.Response
    
    Raises:
    
    * urllib2.URLError
    
    """
        def addThumbnailShortcut(picassaResponse):
            """This goes through the results and adds a thumbnail shortcut"""
            for result in picassaResponse.entries:
                result['thumbnail'] = result['media_thumbnail'][2]['url']
            return picassaResponse

        try:
            pos = self._origin() + offset
            url = 'https://picasaweb.google.com/data/feed/api/all?q={0}&access={1}&kind={2}&start-index={3}&max-results={4}'.format(
                urllib2.quote(query.search_terms), self.access, self.kind, pos,
                self.resultsPerPage)

            data = urllib2.urlopen(url)
            picassaResponse = Response.parse_feed(data.read())
            picassaResponse = addThumbnailShortcut(picassaResponse)
            return picassaResponse

# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("Picassa", e, errorType='urllib2', url=url)
Пример #11
0
    def search(self, query, pos=0):
        """Search function for OpenSearch compliant website.
    
    If a template exists, a search will be executed immediately using the search template,
    Otherwise, given the site URL, a template will attempt to be discovered.
    
    Parameters:
    
    * query (puppy.model.Query)
    * pos (int)
    
    Returns:
    
    * results (puppy.model.Response)
    
    Raises:
    
    * urllib2.URLError
    
    """

        if self.template:
            # template exists, use it to search
            search_url = self.template.replace("{searchTerms}", urllib2.quote(query.search_terms))
            if pos != 0:
                search_url = search_url.replace("{start}", urllib2.quote(pos))
            else:
                pass
            try:
                response = urllib2.urlopen(search_url).read()
                if self.results and self.xml:
                    return Response.parse_feed(response)
                elif not self.results and self.xml:
                    return Response.parse_xml_suggestions(response)
                elif not self.results and not self.xml:
                    return Response.parse_json_suggestions(response)
            except urllib2.URLError, e:
                print "Opensearch for {0} failed".format(self.url)
Пример #12
0
    def search(self, query, offset):
        """
    Search function for retrieving results from the PuppyIR Pathfinder service which searches the information centre at the Emma Children's Hospital.
  
    Parameters:
  
    * query (puppy.model.Query)

    * offset (int): result offset for the search
  
    Returns:
  
    * results puppy.model.Response
  
    Raises:
  
    * urllib2.URLError
  
    """
        try:
            pos = self._origin() + offset
            format = 'rss'
            url = "http://pathfinder.cs.utwente.nl/cgi-bin/opensearch/ekz.cgi?query={0}&page={1}&format={2}&leeftijd={3}&size={4}".format(
                urllib2.quote(query.search_terms), pos, format, self.age,
                self.resultsPerPage)

            data = urllib2.urlopen(url)
            emmaResponse = Response.parse_feed(data.read())
            emmaResponse = self.addEmmaAge(emmaResponse)
            emmaResponse = self.addEmmaItemId(emmaResponse)
            return emmaResponse

# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("PuppyIR Pathfinder Search",
                                    e,
                                    errorType='urllib2',
                                    url=url)
Пример #13
0
  def search(self, query, offset):
    """
    Search function for Picassa.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.OpenSearch.Response
    
    Raises:
    
    * urllib2.URLError
    
    """
    def addThumbnailShortcut(picassaResponse):
      """This goes through the results and adds a thumbnail shortcut"""
      for result in picassaResponse.entries:
        result['thumbnail'] = result['media_thumbnail'][2]['url']
      return picassaResponse

    try:
      pos = self._origin() + offset    
      url ='https://picasaweb.google.com/data/feed/api/all?q={0}&access={1}&kind={2}&start-index={3}&max-results={4}'.format(urllib2.quote(query.search_terms), self.access, self.kind, pos, self.resultsPerPage)
    
      data = urllib2.urlopen(url)
      picassaResponse = Response.parse_feed(data.read())
      picassaResponse = addThumbnailShortcut(picassaResponse)
      return picassaResponse

	# urllib2 - this catches http errors due to the service being down, lack of a proxy etc
    except urllib2.URLError, e:
      raise SearchEngineError("Picassa", e, errorType = 'urllib2', url = url)
Пример #14
0
    def search(self, query, offset):
        """
    Search function for YouTube.
    
    Parameters:
    
    * query (puppy.model.OpenSearch.Query)

    * offset (int): result offset for the search
    
    Returns:
    
    * puppy.model.OpenSearch.Response
    
    Raises:
    
    * urllib2.URLError
    
    """
        def addExtraFields(youtubeResponse):
            """This goes through the results and adds: the summary field, the embed url and adds a thumbnail shortcut."""
            for result in youtubeResponse.entries:
                author = result['author']
                fullDescription = result[
                    'media_group']  # This is author+description+'youtube'
                result['summary'] = fullDescription[
                    len(author):len(fullDescription) -
                    7]  #Remove author from start and 'youtube' from end - Perhaps find more elegant method
                result['embedUrl'] = 'http://www.youtube.com/embed/' + result[
                    'id'].split(':video:')[1]

                if len(
                        result['media_thumbnail']
                ) >= 2:  # If we have 2 or more thumbnails use the second (hq thumbnail)
                    result['thumbnail'] = result['media_thumbnail'][1]['url']
                elif len(
                        result['media_thumbnail']
                ) == 1:  # Otherwise use the first (it's pretty low res compared to above)
                    result['thumbnail'] = result['media_thumbnail'][0]['url']
                else:
                    result[
                        'thumbnail'] = ''  # If that fails just leave it blank

            return youtubeResponse

        try:
            pos = self._origin() + (offset * self.resultsPerPage)
            url = 'http://gdata.youtube.com/feeds/api/videos?q={0}&max-results={1}&safeSearch={2}&start-index={3}&orderby={4}&v=2'.format(
                urllib2.quote(query.search_terms), self.resultsPerPage,
                self.safeSearch, pos, self.orderBy)

            if self.format:
                url += "&format={0}".format(self.format)

            if self.location and self.locationRadius:
                url += "&location-radius={0}&location={1}".format(
                    self.locationRadius, self.location)
                if self.onlyLocation == True:
                    url += '!'  # This forces YouTube to only return results with a location

            data = urllib2.urlopen(url)
            youtubeResponse = Response.parse_feed(data.read())
            youtubeResponse = addExtraFields(
                youtubeResponse
            )  # Does some processing to get embed url, summary and thumbnail shortcut
            return youtubeResponse

        # urllib2 - this catches http errors due to the service being down, lack of a proxy etc
        except urllib2.URLError, e:
            raise SearchEngineError("YouTube V2",
                                    e,
                                    errorType='urllib2',
                                    url=url)