def get_popular_links(input_time=config_pytomo.TIME_FRAME, max_results=config_pytomo.MAX_PER_PAGE): '''Returns the most popular dailymotion links for France. The country should be set as parameter in start_pytomo if user should specify it. The number of videos returned is given as Total_pages. (The results returned are in no particular order). A set of only dailymotion links from url ''' config_pytomo.LOG.debug('Getting popular links per country') time_frame = get_time_frame(input_time) if max_results > MAX_VIDEO_PER_PAGE: pages = int(max_results) / MAX_VIDEO_PER_PAGE else: pages = 1 for page in xrange(pages): url = CHARTS_URL.format(country=COUNTRY, time_frame=time_frame, page=(page + 1)) links = lib_links_extractor.get_all_links(url) if not links: config_pytomo.LOG.warning('No popular link was found') popular_links = set() for link in links: if link.find(r'/video/') >= 0: if link.startswith('/'): link = ''.join((DM_LINK, link)) popular_links.add(link) if len(popular_links) >= max_results: break return popular_links
def get_youtube_links(url, max_per_page): "Return a set of only Youtube links from url" if not ('youtube' in url or 'youtu.be' in url): config_pytomo.LOG.error("Only youtube is implemented, got url: %s" % url) return [] links = lib_links_extractor.get_all_links(url) youtube_links = set() config_pytomo.LOG.info("Found %d links for url %s" % (len(links), url)) for link in links: if link.find("/watch") >= 0: if link.startswith('/'): link = ''.join(("http://www.youtube.com", link)) youtube_links.add(link) if len(youtube_links) >= max_per_page: break config_pytomo.LOG.info("Found %d related video links for url %s" % (len(youtube_links), url)) return youtube_links
def get_youtube_links(url, max_per_page): "Return a set of only Youtube links from url" if not ('youtube' in url or 'youtu.be' in url): config_pytomo.LOG.error("Only youtube is implemented, got url: %s" % url) return [] links = lib_links_extractor.get_all_links(url) youtube_links = set() config_pytomo.LOG.info("Found %d links for url %s" % (len(links), url)) for link in links: if link.find("/watch") >= 0: if link.startswith('/'): link = ''.join(("https://www.youtube.com", link)) youtube_links.add(link) if len(youtube_links) >= max_per_page: break config_pytomo.LOG.info("Found %d related video links for url %s" % (len(youtube_links), url)) return youtube_links
def get_popular_links(input_time=config_pytomo.TIME_FRAME, max_results=config_pytomo.MAX_PER_PAGE, country=GLOBAL_COUNTRY): '''Returns the most popular youtube links (world-wide). The number of videos returned is given as Total_pages. (The results returned are in no particular order). A set of only Youtube links from url ''' config_pytomo.LOG.debug('Getting popular links') if not country: country = GLOBAL_COUNTRY time_frame = get_time_frame(input_time) if max_results > MAX_VIDEO_PER_PAGE: pages = int(max_results) / MAX_VIDEO_PER_PAGE else: pages = 1 for page in xrange(pages): url = CHARTS_URL.format(country=country, time_frame=time_frame, page=(page + 1)) links = lib_links_extractor.get_all_links(url) if not links: config_pytomo.LOG.warning('No popular link was found') popular_links = set() matcher = re.compile(LINK_REG_EXP) for link in links: config_pytomo.LOG.debug('found link: %s' % link) if link.find(r'/watch?v=') >= 0: if link.startswith('/'): link = ''.join(("http://www.youtube.com", link)) popular_links.add(link) if len(popular_links) >= max_results: break if matcher.search(link) >= 0: config_pytomo.LOG.debug('videos_ids input link: %s' % link) video_links = parse_watch_videos_link(link) for video_link in video_links: popular_links.add(video_link) if len(popular_links) >= max_results: break config_pytomo.LOG.debug('popular links are: %s' % popular_links) return popular_links