class YoutubeMusic: def __init__(self): self.service = YouTubeService() def search(self, artist): query = YouTubeVideoQuery() query.vq = artist query.orderby = 'relevance' query.racy = 'exclude' query.format = '5' query.max_results = 50 query.categories.append("/Music") feed = self.service.YouTubeQuery(query) results = [] for entry in feed.entry: if not self.is_valid_entry(artist, entry): continue results.append({ 'url': entry.media.player.url, 'title': smart_str(entry.media.title.text), 'duration': int(entry.media.duration.seconds), }) return {'artist': artist, 'results': results} def is_valid_entry(self, artist, entry): duration = int(entry.media.duration.seconds) title = smart_str(entry.media.title.text).lower() if entry.rating is not None and float(entry.rating.average) < 3.5: return False if entry.statistics is None or int(entry.statistics.view_count) < 1000: return False if duration < (2 * 60) or duration > (9 * 60): return False if artist.lower() not in title: return False if re.search(r"\b(perform|performance|concert|cover)\b", title): return False return True
class YouTube(WebDataSource): ''' searches youtube video library ''' YT_ATOM_RESULT_TO_DICT_MAPPING = { 'media.title.text': 'title', 'published.text': 'published', 'media.description.text': 'content', 'media.duration.seconds': 'duration', 'statistics.view_count': 'statistics_viewcount', 'statistics.favorite_count': 'statistics_favoritecount', 'rating.average': 'rating_average', 'rating.max': 'rating_max', 'rating.min': 'rating_min', 'rating.num_raters': 'rating_numraters', 'summary': 'summary', 'rights': 'rights', 'updated.text': 'last_modified', 'source': 'yt_source' } YT_COMMENTS_MAPPING = { 'id.text': 'id', 'title.text': 'title', 'published.text': 'published', 'updated.text': 'last_modified', 'content.text': 'content' } def __init__(self): WebDataSource.__init__(self) self.youtube_service = YouTubeService() def search(self, search_terms, location=None, max_results=MAX_RESULTS_PER_QUERY, max_age=None, orderby='published', max_comment_count=0): """ Searches for youtube videos. @param search_terms: list of search terms @param location: tuple latitude, longitue, e.g. 37.42307,-122.08427 @param max_results: @param max_age: datetime of the oldest entry @param orderby: order search results by (relevance, published, viewCount, rating) @param max_comment_count: maximum number of comments to fetch (default: 0) """ if not (isinstance(search_terms, list) or isinstance( search_terms, tuple) or isinstance(search_terms, set)): raise ValueError("Warning search requires a list of search terms, \ rather than a single term") # all youtube search parameter are here: # https://developers.google.com/youtube/2.0/reference?hl=de#Custom_parameters query = YouTubeVideoQuery() query.vq = ', '.join(search_terms) query.orderby = orderby query.racy = 'include' query.time = self.get_query_time(max_age) query.max_results = MAX_RESULTS_PER_QUERY if location: query.location = location return self.search_youtube(query, max_results, max_comment_count) @classmethod def get_query_time(cls, max_age): ''' converts a datetime or int (age in minutes) to the youtube specific query parameter (e.g. this_month, today ...) @param max_age: int or datetime object @return: youtube specific query_time ''' if not max_age: return 'all_time' if isinstance(max_age, datetime): # convert datetime to minutes max_age = (datetime.now() - max_age).total_seconds() / 60 if max_age <= 1440: query_time = 'today' elif max_age > 1440 and max_age <= 10080: query_time = 'this_week' else: query_time = 'this_month' return query_time def search_youtube(self, query, max_results=MAX_RESULTS_PER_QUERY, max_comment_count=0): ''' executes the youtube query and facilitates paging of the resultset @param query: YouTubeVideoQuery @param max_results: @param max_comment_count: maximum number of comments to fetch @return: list of dictionaries ''' result = [] feed = self.youtube_service.YouTubeQuery(query) while feed: for entry in feed.entry: try: yt_dict = self.convert_feed_entry(entry, max_comment_count) result.append(yt_dict) except Exception, e: logger.exception('Exception converting entry: %s' % e) if len(result) == max_results: return result if not feed.GetNextLink(): break feed = self.youtube_service.GetYouTubeVideoFeed( feed.GetNextLink().href) return result