Пример #1
0
def get_popular_links(input_time=config_pytomo.TIME_FRAME,
                      max_results=config_pytomo.MAX_PER_PAGE):
    '''Returns the most popular dailymotion links for France.
    The country should be set as parameter in start_pytomo if user should
    specify it.
    The number of videos returned is given as Total_pages.
    (The results returned are in no particular order).
    A set of only dailymotion links from url
    '''
    config_pytomo.LOG.debug('Getting popular links per country')
    time_frame = get_time_frame(input_time)
    if max_results > MAX_VIDEO_PER_PAGE:
        pages = int(max_results) / MAX_VIDEO_PER_PAGE
    else:
        pages = 1
    for page in xrange(pages):
        url = CHARTS_URL.format(country=COUNTRY,
                                time_frame=time_frame,
                                page=(page + 1))
        links = lib_links_extractor.get_all_links(url)
        if not links:
            config_pytomo.LOG.warning('No popular link was found')
        popular_links = set()
        for link in links:
            if link.find(r'/video/') >= 0:
                if link.startswith('/'):
                    link = ''.join((DM_LINK, link))
                popular_links.add(link)
                if len(popular_links) >= max_results:
                    break
    return popular_links
Пример #2
0
def get_popular_links(input_time=config_pytomo.TIME_FRAME,
                      max_results=config_pytomo.MAX_PER_PAGE):
    '''Returns the most popular dailymotion links for France.
    The country should be set as parameter in start_pytomo if user should
    specify it.
    The number of videos returned is given as Total_pages.
    (The results returned are in no particular order).
    A set of only dailymotion links from url
    '''
    config_pytomo.LOG.debug('Getting popular links per country')
    time_frame = get_time_frame(input_time)
    if max_results > MAX_VIDEO_PER_PAGE:
        pages = int(max_results) / MAX_VIDEO_PER_PAGE
    else:
        pages = 1
    for page in xrange(pages):
        url = CHARTS_URL.format(country=COUNTRY, time_frame=time_frame,
                                page=(page + 1))
        links = lib_links_extractor.get_all_links(url)
        if not links:
            config_pytomo.LOG.warning('No popular link was found')
        popular_links = set()
        for link in links:
            if link.find(r'/video/') >= 0:
                if link.startswith('/'):
                    link = ''.join((DM_LINK, link))
                popular_links.add(link)
                if len(popular_links) >= max_results:
                    break
    return popular_links
Пример #3
0
def get_youtube_links(url, max_per_page):
    "Return a set of only Youtube links from url"
    if not ('youtube' in url or 'youtu.be' in url):
        config_pytomo.LOG.error("Only youtube is implemented, got url: %s"
                                % url)
        return []
    links = lib_links_extractor.get_all_links(url)
    youtube_links = set()
    config_pytomo.LOG.info("Found %d links for url %s" % (len(links), url))
    for link in links:
        if link.find("/watch") >= 0:
            if link.startswith('/'):
                link = ''.join(("http://www.youtube.com", link))
            youtube_links.add(link)
            if len(youtube_links) >= max_per_page:
                break
    config_pytomo.LOG.info("Found %d related video links for url %s"
                            % (len(youtube_links), url))
    return youtube_links
Пример #4
0
def get_youtube_links(url, max_per_page):
    "Return a set of only Youtube links from url"
    if not ('youtube' in url or 'youtu.be' in url):
        config_pytomo.LOG.error("Only youtube is implemented, got url: %s" %
                                url)
        return []
    links = lib_links_extractor.get_all_links(url)
    youtube_links = set()
    config_pytomo.LOG.info("Found %d links for url %s" % (len(links), url))
    for link in links:
        if link.find("/watch") >= 0:
            if link.startswith('/'):
                link = ''.join(("https://www.youtube.com", link))
            youtube_links.add(link)
            if len(youtube_links) >= max_per_page:
                break
    config_pytomo.LOG.info("Found %d related video links for url %s" %
                           (len(youtube_links), url))
    return youtube_links
Пример #5
0
def get_popular_links(input_time=config_pytomo.TIME_FRAME,
                      max_results=config_pytomo.MAX_PER_PAGE,
                      country=GLOBAL_COUNTRY):
    '''Returns the most popular youtube links (world-wide).
    The number of videos returned is given as Total_pages.
    (The results returned are in no particular order).
    A set of only Youtube links from url
    '''
    config_pytomo.LOG.debug('Getting popular links')
    if not country:
        country = GLOBAL_COUNTRY
    time_frame = get_time_frame(input_time)
    if max_results > MAX_VIDEO_PER_PAGE:
        pages = int(max_results) / MAX_VIDEO_PER_PAGE
    else:
        pages = 1
    for page in xrange(pages):
        url = CHARTS_URL.format(country=country,
                                time_frame=time_frame,
                                page=(page + 1))
        links = lib_links_extractor.get_all_links(url)
        if not links:
            config_pytomo.LOG.warning('No popular link was found')
        popular_links = set()
        matcher = re.compile(LINK_REG_EXP)
        for link in links:
            config_pytomo.LOG.debug('found link: %s' % link)
            if link.find(r'/watch?v=') >= 0:
                if link.startswith('/'):
                    link = ''.join(("http://www.youtube.com", link))
                popular_links.add(link)
                if len(popular_links) >= max_results:
                    break
            if matcher.search(link) >= 0:
                config_pytomo.LOG.debug('videos_ids input link: %s' % link)
                video_links = parse_watch_videos_link(link)
                for video_link in video_links:
                    popular_links.add(video_link)
                    if len(popular_links) >= max_results:
                        break
    config_pytomo.LOG.debug('popular links are: %s' % popular_links)
    return popular_links
Пример #6
0
def get_popular_links(input_time=config_pytomo.TIME_FRAME,
                      max_results=config_pytomo.MAX_PER_PAGE,
                      country=GLOBAL_COUNTRY):
    '''Returns the most popular youtube links (world-wide).
    The number of videos returned is given as Total_pages.
    (The results returned are in no particular order).
    A set of only Youtube links from url
    '''
    config_pytomo.LOG.debug('Getting popular links')
    if not country:
        country = GLOBAL_COUNTRY
    time_frame = get_time_frame(input_time)
    if max_results > MAX_VIDEO_PER_PAGE:
        pages = int(max_results) / MAX_VIDEO_PER_PAGE
    else:
        pages = 1
    for page in xrange(pages):
        url = CHARTS_URL.format(country=country, time_frame=time_frame,
                                page=(page + 1))
        links = lib_links_extractor.get_all_links(url)
        if not links:
            config_pytomo.LOG.warning('No popular link was found')
        popular_links = set()
        matcher = re.compile(LINK_REG_EXP)
        for link in links:
            config_pytomo.LOG.debug('found link: %s' % link)
            if link.find(r'/watch?v=') >= 0:
                if link.startswith('/'):
                    link = ''.join(("http://www.youtube.com", link))
                popular_links.add(link)
                if len(popular_links) >= max_results:
                    break
            if matcher.search(link) >= 0:
                config_pytomo.LOG.debug('videos_ids input link: %s' % link)
                video_links = parse_watch_videos_link(link)
                for video_link in video_links:
                    popular_links.add(video_link)
                    if len(popular_links) >= max_results:
                        break
    config_pytomo.LOG.debug('popular links are: %s' % popular_links)
    return popular_links