def search(self, query, offset): """ Search function for YouTube. Parameters: * query (puppy.model.OpenSearch.Query) * offset (int): result offset for the search Returns: * puppy.model.OpenSearch.Response Raises: * urllib2.URLError """ try: pos = self._origin() + offset url = 'http://gdata.youtube.com/feeds/api/videos?vq={0}&racy=exclude&orderby=viewCount&start-index={1}'.format( urllib2.quote(query.search_terms), pos) data = urllib2.urlopen(url) return Response.parse_feed(data.read()) # urllib2 - this catches http errors due to the service being down, lack of a proxy etc except urllib2.URLError, e: raise SearchEngineError("YouTube", e, errorType='urllib2', url=url)
def search(self, query, offset): """ Search function for Twitter. Parameters: * query (puppy.model.OpenSearch.Query) * offset (int): result offset for the search Returns: * puppy.model.Response Raises: * urllib2.URLError """ try: pos = self._origin() + offset url = 'http://search.twitter.com/search.atom?q={0}&lang={1}&page={2}&result_type={3}&rpp={4}&include_entities={5}'.format(urllib2.quote(query.search_terms), self.language, pos, self.type, self.resultsPerPage, self.includeEntities) if self.geocode: url += '&geocode:{0}'.format(self.geocode) data = urllib2.urlopen(url) return Response.parse_feed(data.read()) # urllib2 - this catches http errors due to the service being down, lack of a proxy etc except urllib2.URLError, e: raise SearchEngineError("Twitter", e, errorType = 'urllib2', url = url)
def search(self, query, offset): """ Search function for retrieving results from the PuppyIR Pathfinder service which searches the information centre at the Emma Children's Hospital. Parameters: * query (puppy.model.Query) * offset (int): result offset for the search Returns: * results puppy.model.Response Raises: * urllib2.URLError """ try: pos = self._origin() + offset format = 'rss' url = "http://pathfinder.cs.utwente.nl/cgi-bin/opensearch/ekz.cgi?query={0}&page={1}&format={2}&leeftijd={3}&size={4}".format(urllib2.quote(query.search_terms), pos, format, self.age, self.resultsPerPage) data = urllib2.urlopen(url) emmaResponse = Response.parse_feed(data.read()) emmaResponse = self.addEmmaAge(emmaResponse) emmaResponse = self.addEmmaItemId(emmaResponse) return emmaResponse # urllib2 - this catches http errors due to the service being down, lack of a proxy etc except urllib2.URLError, e: raise SearchEngineError("PuppyIR Pathfinder Search", e, errorType = 'urllib2', url = url)
def search(self, query, offset): """ Search function for YouTube. Parameters: * query (puppy.model.OpenSearch.Query) * offset (int): result offset for the search Returns: * puppy.model.OpenSearch.Response Raises: * urllib2.URLError """ try: pos = self._origin() + offset url = "http://gdata.youtube.com/feeds/api/videos?vq={0}&racy=exclude&orderby=viewCount&start-index={1}".format( urllib2.quote(query.search_terms), pos ) data = urllib2.urlopen(url) return Response.parse_feed(data.read()) # urllib2 - this catches http errors due to the service being down, lack of a proxy etc except urllib2.URLError, e: raise SearchEngineError("YouTube", e, errorType="urllib2", url=url)
def search(self, query, offset): """ Search function for Microsoft Bing. Parameters: * query (puppy.model.Query) * offset (int): result offset for the search Returns: * results (puppy.model.Response) Raises: * urllib2.URLError """ def addDefaultThumbnails(bingResponse): """This goes through the results and adds a easy access default thumbnail.""" for result in bingResponse.entries: result['thumbnail'] = result['media_thumbnail'][0]['url'] result['thumbnailWidth'] = result['media_thumbnail'][0][ 'width'] result['thumbnailHeight'] = result['media_thumbnail'][0][ 'height'] return bingResponse try: formattedQuery = urllib2.quote( self._modify_query(query.search_terms)) pos = self._origin() if (offset > 0): pos = pos + (offset * self.resultsPerPage) url = 'http://api.search.live.net/rss.aspx?&query={0}&source={1}&{1}.count={2}&{1}.offset={3}&Adult={4}&Market={5}'.format( formattedQuery, self.source, self.resultsPerPage, pos, self.adult, self.market) # If the source type is web or phonebook we can add lon/lat/radius for local search if (self.source == 'web') or (self.source == 'phonebook'): if (self.lat) and (self.lon) and (self.radius): url += "&Latitude={0}&Longitude={1}&Radius={2}".format( self.lat, self.lon, self.radius) data = urllib2.urlopen(url) bingResponse = Response.parse_feed(data.read()) if self.source == 'image': bingResponse = addDefaultThumbnails(bingResponse) return bingResponse # urllib2 - this catches http errors due to the service being down, lack of a proxy etc except urllib2.URLError, e: raise SearchEngineError("Bing", e, errorType='urllib2', url=url)
def search(self, query, offset): """ Search function for YouTube. Parameters: * query (puppy.model.OpenSearch.Query) * offset (int): result offset for the search Returns: * puppy.model.OpenSearch.Response Raises: * urllib2.URLError """ def addExtraFields(youtubeResponse): """This goes through the results and adds: the summary field, the embed url and adds a thumbnail shortcut.""" for result in youtubeResponse.entries: author = result['author'] fullDescription = result['media_group'] # This is author+description+'youtube' result['summary'] = fullDescription[len(author):len(fullDescription)-7] #Remove author from start and 'youtube' from end - Perhaps find more elegant method result['embedUrl'] = 'http://www.youtube.com/embed/' + result['id'].split(':video:')[1] if len(result['media_thumbnail']) >= 2: # If we have 2 or more thumbnails use the second (hq thumbnail) result['thumbnail'] = result['media_thumbnail'][1]['url'] elif len(result['media_thumbnail']) == 1: # Otherwise use the first (it's pretty low res compared to above) result['thumbnail'] = result['media_thumbnail'][0]['url'] else: result['thumbnail'] = '' # If that fails just leave it blank return youtubeResponse try: pos = self._origin() + (offset * self.resultsPerPage) url = 'http://gdata.youtube.com/feeds/api/videos?q={0}&max-results={1}&safeSearch={2}&start-index={3}&orderby={4}&v=2'.format(urllib2.quote(query.search_terms), self.resultsPerPage, self.safeSearch, pos, self.orderBy) if self.format: url += "&format={0}".format(self.format) if self.location and self.locationRadius: url+= "&location-radius={0}&location={1}".format(self.locationRadius, self.location) if self.onlyLocation == True: url += '!' # This forces YouTube to only return results with a location data = urllib2.urlopen(url) youtubeResponse = Response.parse_feed(data.read()) youtubeResponse = addExtraFields(youtubeResponse) # Does some processing to get embed url, summary and thumbnail shortcut return youtubeResponse # urllib2 - this catches http errors due to the service being down, lack of a proxy etc except urllib2.URLError, e: raise SearchEngineError("YouTube V2", e, errorType = 'urllib2', url = url)
def search(self, query, offset): """ Search function for Microsoft Bing. Parameters: * query (puppy.model.Query) * offset (int): result offset for the search Returns: * results (puppy.model.Response) Raises: * urllib2.URLError """ def addDefaultThumbnails(bingResponse): """This goes through the results and adds a easy access default thumbnail.""" for result in bingResponse.entries: result['thumbnail'] = result['media_thumbnail'][0]['url'] result['thumbnailWidth'] = result['media_thumbnail'][0]['width'] result['thumbnailHeight'] = result['media_thumbnail'][0]['height'] return bingResponse try: formattedQuery = urllib2.quote(self._modify_query(query.search_terms)) pos = self._origin() if (offset > 0): pos = pos + (offset * self.resultsPerPage) url = 'http://api.search.live.net/rss.aspx?&query={0}&source={1}&{1}.count={2}&{1}.offset={3}&Adult={4}&Market={5}'.format(formattedQuery, self.source, self.resultsPerPage, pos, self.adult, self.market) # If the source type is web or phonebook we can add lon/lat/radius for local search if(self.source == 'web') or (self.source == 'phonebook'): if (self.lat) and (self.lon) and (self.radius): url += "&Latitude={0}&Longitude={1}&Radius={2}".format(self.lat, self.lon, self.radius) data = urllib2.urlopen(url) bingResponse = Response.parse_feed(data.read()) if self.source == 'image': bingResponse = addDefaultThumbnails(bingResponse) return bingResponse # urllib2 - this catches http errors due to the service being down, lack of a proxy etc except urllib2.URLError, e: raise SearchEngineError("Bing", e, errorType = 'urllib2', url = url)
def search(self, query, limit): """ Search function for Hospial Data. Parameters: * query (puppy.model.Query) * limit (int): maximum number of results returned by the search Returns: * results puppy.model.Response Raises: * Exception """ try: conn = _mssql.connect(server=self._server, user=self._user, password=self._password, database=self._database) # conn.execute_query('SELECT TOP ' + str(limit)+ ' * FROM '+ _database_bookstable + ' INNER JOIN FREETEXTTABLE('+ _database_bookstable+',*, \''+ query+'\') as ft ON ft.[KEY]=' + _database_bookstable_id+ ' ORDER BY ft.RANK DESC FOR xml raw, ELEMENTS;') sql_query ='SELECT temp.Id, temp.Titel, temp.Auteurs, temp.Leeftijdscategorie, temp.Annotatie, temp.ISBN, temp.Editie, temp.Uitgever, temp.[Classificatie code], temp.[Jaar van uitgave], temp.[Plaats van uitgave], temp.[Ref Type], temp.[Prijs], temp.[Serie], img.location FROM (SELECT TOP ' + str(self.resultsPerPage)+ ' * FROM '+ str(self._database_bookstable) + ' INNER JOIN FREETEXTTABLE('+ self._database_bookstable+',*,\''+ query.search_terms+'\') as ft ON ft.[KEY]=Id ORDER BY ft.RANK DESC) as temp LEFT OUTER JOIN '+ str(self._database_images)+' as img ON temp.Id = img.item_id FOR xml raw, ELEMENTS;'; conn.execute_query(sql_query) response="" for row in conn: response = response + row[0].strip() response = self.cleanXmlEmma(response) #add rss tag for parsing response = "<rss>" + response + "</rss>" results = Response.parse_feed(response) results = self.parseEmmaAge(results) results = self.parseEmmaLinks(results) return results # Catch all exception, just in case except Exception, e: raise SearchEngineError("PuppyIR Emma Sql Server Search", e)
def search(self, query, pos=0): """Search function for OpenSearch compliant website. If a template exists, a search will be executed immediately using the search template, Otherwise, given the site URL, a template will attempt to be discovered. Parameters: * query (puppy.model.Query) * pos (int) Returns: * results (puppy.model.Response) Raises: * urllib2.URLError """ if self.template: # template exists, use it to search search_url = self.template.replace( '{searchTerms}', urllib2.quote(query.search_terms)) if (pos != 0): search_url = search_url.replace('{start}', urllib2.quote(pos)) else: pass try: response = urllib2.urlopen(search_url).read() if self.results and self.xml: return Response.parse_feed(response) elif not self.results and self.xml: return Response.parse_xml_suggestions(response) elif not self.results and not self.xml: return Response.parse_json_suggestions(response) except urllib2.URLError, e: print "Opensearch for {0} failed".format(self.url)
def search(self, query, offset): """ Search function for Picassa. Parameters: * query (puppy.model.OpenSearch.Query) * offset (int): result offset for the search Returns: * puppy.model.OpenSearch.Response Raises: * urllib2.URLError """ def addThumbnailShortcut(picassaResponse): """This goes through the results and adds a thumbnail shortcut""" for result in picassaResponse.entries: result['thumbnail'] = result['media_thumbnail'][2]['url'] return picassaResponse try: pos = self._origin() + offset url = 'https://picasaweb.google.com/data/feed/api/all?q={0}&access={1}&kind={2}&start-index={3}&max-results={4}'.format( urllib2.quote(query.search_terms), self.access, self.kind, pos, self.resultsPerPage) data = urllib2.urlopen(url) picassaResponse = Response.parse_feed(data.read()) picassaResponse = addThumbnailShortcut(picassaResponse) return picassaResponse # urllib2 - this catches http errors due to the service being down, lack of a proxy etc except urllib2.URLError, e: raise SearchEngineError("Picassa", e, errorType='urllib2', url=url)
def search(self, query, pos=0): """Search function for OpenSearch compliant website. If a template exists, a search will be executed immediately using the search template, Otherwise, given the site URL, a template will attempt to be discovered. Parameters: * query (puppy.model.Query) * pos (int) Returns: * results (puppy.model.Response) Raises: * urllib2.URLError """ if self.template: # template exists, use it to search search_url = self.template.replace("{searchTerms}", urllib2.quote(query.search_terms)) if pos != 0: search_url = search_url.replace("{start}", urllib2.quote(pos)) else: pass try: response = urllib2.urlopen(search_url).read() if self.results and self.xml: return Response.parse_feed(response) elif not self.results and self.xml: return Response.parse_xml_suggestions(response) elif not self.results and not self.xml: return Response.parse_json_suggestions(response) except urllib2.URLError, e: print "Opensearch for {0} failed".format(self.url)
def search(self, query, offset): """ Search function for retrieving results from the PuppyIR Pathfinder service which searches the information centre at the Emma Children's Hospital. Parameters: * query (puppy.model.Query) * offset (int): result offset for the search Returns: * results puppy.model.Response Raises: * urllib2.URLError """ try: pos = self._origin() + offset format = 'rss' url = "http://pathfinder.cs.utwente.nl/cgi-bin/opensearch/ekz.cgi?query={0}&page={1}&format={2}&leeftijd={3}&size={4}".format( urllib2.quote(query.search_terms), pos, format, self.age, self.resultsPerPage) data = urllib2.urlopen(url) emmaResponse = Response.parse_feed(data.read()) emmaResponse = self.addEmmaAge(emmaResponse) emmaResponse = self.addEmmaItemId(emmaResponse) return emmaResponse # urllib2 - this catches http errors due to the service being down, lack of a proxy etc except urllib2.URLError, e: raise SearchEngineError("PuppyIR Pathfinder Search", e, errorType='urllib2', url=url)
def search(self, query, offset): """ Search function for Picassa. Parameters: * query (puppy.model.OpenSearch.Query) * offset (int): result offset for the search Returns: * puppy.model.OpenSearch.Response Raises: * urllib2.URLError """ def addThumbnailShortcut(picassaResponse): """This goes through the results and adds a thumbnail shortcut""" for result in picassaResponse.entries: result['thumbnail'] = result['media_thumbnail'][2]['url'] return picassaResponse try: pos = self._origin() + offset url ='https://picasaweb.google.com/data/feed/api/all?q={0}&access={1}&kind={2}&start-index={3}&max-results={4}'.format(urllib2.quote(query.search_terms), self.access, self.kind, pos, self.resultsPerPage) data = urllib2.urlopen(url) picassaResponse = Response.parse_feed(data.read()) picassaResponse = addThumbnailShortcut(picassaResponse) return picassaResponse # urllib2 - this catches http errors due to the service being down, lack of a proxy etc except urllib2.URLError, e: raise SearchEngineError("Picassa", e, errorType = 'urllib2', url = url)
def search(self, query, offset): """ Search function for YouTube. Parameters: * query (puppy.model.OpenSearch.Query) * offset (int): result offset for the search Returns: * puppy.model.OpenSearch.Response Raises: * urllib2.URLError """ def addExtraFields(youtubeResponse): """This goes through the results and adds: the summary field, the embed url and adds a thumbnail shortcut.""" for result in youtubeResponse.entries: author = result['author'] fullDescription = result[ 'media_group'] # This is author+description+'youtube' result['summary'] = fullDescription[ len(author):len(fullDescription) - 7] #Remove author from start and 'youtube' from end - Perhaps find more elegant method result['embedUrl'] = 'http://www.youtube.com/embed/' + result[ 'id'].split(':video:')[1] if len( result['media_thumbnail'] ) >= 2: # If we have 2 or more thumbnails use the second (hq thumbnail) result['thumbnail'] = result['media_thumbnail'][1]['url'] elif len( result['media_thumbnail'] ) == 1: # Otherwise use the first (it's pretty low res compared to above) result['thumbnail'] = result['media_thumbnail'][0]['url'] else: result[ 'thumbnail'] = '' # If that fails just leave it blank return youtubeResponse try: pos = self._origin() + (offset * self.resultsPerPage) url = 'http://gdata.youtube.com/feeds/api/videos?q={0}&max-results={1}&safeSearch={2}&start-index={3}&orderby={4}&v=2'.format( urllib2.quote(query.search_terms), self.resultsPerPage, self.safeSearch, pos, self.orderBy) if self.format: url += "&format={0}".format(self.format) if self.location and self.locationRadius: url += "&location-radius={0}&location={1}".format( self.locationRadius, self.location) if self.onlyLocation == True: url += '!' # This forces YouTube to only return results with a location data = urllib2.urlopen(url) youtubeResponse = Response.parse_feed(data.read()) youtubeResponse = addExtraFields( youtubeResponse ) # Does some processing to get embed url, summary and thumbnail shortcut return youtubeResponse # urllib2 - this catches http errors due to the service being down, lack of a proxy etc except urllib2.URLError, e: raise SearchEngineError("YouTube V2", e, errorType='urllib2', url=url)