示例#1
0
    def scrape(self, url):
        """Scrapes a url by passing it through youtube-dl"""
        if not is_youtube(url):
            return

        # FIXME: Sometimes youtube-dl takes a *long* time to run. This
        # needs to give indication of progress.
        try:
            output = subprocess.check_output(
                ['youtube-dl', '-j', url],
                stderr=subprocess.STDOUT
            )
        except subprocess.CalledProcessError as cpe:
            raise ScraperError('youtube-dl said "{0}".'.format(cpe.output))
        except OSError:
            raise ScraperError('youtube-dl not installed or not on PATH.')

        # Each line is a single JSON object.
        items = []
        for line in output.splitlines():
            items.append(json.loads(line))

        items = [self.transform_item(item) for item in items]

        return items
示例#2
0
def test_is_youtube():
    data = [
        ('http://www.youtube.com/watch?v=N29XAFjiKf4', True),
        ('http://youtu.be/N29XAFjiKf4', True),
    ]

    for url, expected in data:
        assert is_youtube(url) == expected
示例#3
0
def test_is_youtube():
    data = [
        ('http://www.youtube.com/watch?v=N29XAFjiKf4', True),
        ('http://youtu.be/N29XAFjiKf4', True),
    ]

    for url, expected in data:
        eq_(is_youtube(url), expected)
示例#4
0
    def scrape(self, url):
        """Scrapes a url by passing it through youtube-dl"""
        if not is_youtube(url):
            return

        # FIXME: Sometimes youtube-dl takes a *long* time to run. This
        # needs to give indication of progress.
        try:
            output = subprocess.check_output(['youtube-dl', '-j', url],
                                             stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as cpe:
            raise ScraperError('youtube-dl said "{0}".'.format(cpe.output))
        except OSError:
            raise ScraperError('youtube-dl not installed or not on PATH.')

        # Each line is a single JSON object.
        items = []
        for line in output.splitlines():
            items.append(json.loads(line))

        items = [self.transform_item(item) for item in items]

        return items