Python scrape_videos示例

编程语言: Python

命名空间/包名称: steve.util

方法/功能: scrape_videos

hotexamples.com的示例: 3

Python scrape_videos - 已找到3个示例。这些是从开源项目中提取的最受好评的steve.util.scrape_videos现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： cmdline.py 项目： CarlFK/steve

def fetch_cmd(cfg, parser, parsed, args):
    if not parsed.quiet:
        parser.print_byline()

    projectpath = cfg.get('project', 'projectpath')
    jsonpath = os.path.join(projectpath, 'json')

    # source_url -> filename
    source_map = dict(
        (item['source_url'], fn)
        for fn, item in load_json_files(cfg)
    )

    if not os.path.exists(jsonpath):
        os.makedirs(jsonpath)

    try:
        url = cfg.get('project', 'url')
    except ConfigParser.NoOptionError:
        url = ''

    if not url:
        err('url not specified in steve.ini project config file.')
        err('Add "url = ..." to [project] section of steve.ini file.')
        return 1

    out('Scraping {0}...'.format(url))
    videos = scrape_videos(url)

    print 'Found {0} videos...'.format(len(videos))
    for i, video in enumerate(videos):
        if video['source_url'] in source_map and not parsed.force:
            print 'Skipping {0}... already exists.'.format(
                stringify(video['title']))
            continue

        filename = generate_filename(video['title'])
        filename = '{index:04d}_{basename}.json'.format(
            index=i, basename=filename[:40])

        print 'Working on {0}... ({1})'.format(
            stringify(video['title']), filename)

        f = open(os.path.join('json', filename), 'w')
        f.write(convert_to_json(video))
        f.close()

        # TODO: what if there's a file there already? on the first one,
        # prompt the user whether to stomp on existing files or skip.
    return 0

示例#2

显示文件

文件： cmdline.py 项目： AvdN/steve

def fetch(cfg, ctx, quiet, force):
    """Fetches videos and generates JSON files."""
    if not quiet:
        click.echo(VERSION)

    jsonpath = cfg.get('project', 'jsonpath')

    # source_url -> filename
    source_map = dict(
        (item['source_url'], fn)
        for fn, item in load_json_files(cfg)
    )

    if not os.path.exists(jsonpath):
        os.makedirs(jsonpath)

    try:
        url = cfg.get('project', 'url')
    except NoOptionError:
        url = ''

    if not url:
        raise click.ClickException(
            u'url not specified in {0} project config file.\n\n'
            u'Add "url = ..." to [project] section of {0} file.'.format(
                get_project_config_file_name())
        )

    click.echo(u'Scraping {0}...'.format(url))
    click.echo(u'(This can take a *long* time with no indication of progress.)')
    videos = scrape_videos(url)

    click.echo(u'Found {0} videos...'.format(len(videos)))
    for i, video in enumerate(videos):
        if video['source_url'] in source_map and not force:
            click.echo(u'Skipping {0}... already exists.'.format(
                stringify(video['title'])))
            continue

        filename = generate_filename(video['title'])
        filename = '{index:04d}_{basename}.json'.format(
            index=i, basename=filename[:40])

        click.echo(u'Created {0}... ({1})'.format(
            stringify(video['title']), filename))

        with open(os.path.join(jsonpath, filename), 'w') as fp:
            fp.write(convert_to_json(video))

示例#3

显示文件

文件： cmdline.py 项目： jethar/steve

def fetch(cfg, ctx, quiet, force):
    """Fetches videos and generates JSON files."""
    if not quiet:
        click.echo(VERSION)

    jsonpath = cfg.get('project', 'jsonpath')

    # source_url -> filename
    source_map = dict(
        (item['source_url'], fn)
        for fn, item in load_json_files(cfg)
    )

    if not os.path.exists(jsonpath):
        os.makedirs(jsonpath)

    try:
        url = cfg.get('project', 'url')
    except ConfigParser.NoOptionError:
        url = ''

    if not url:
        raise click.ClickException(
            u'url not specified in {0} project config file.\n\n'
            u'Add "url = ..." to [project] section of {0} file.'.format(
                get_project_config_file_name())
        )

    click.echo(u'Scraping {0}...'.format(url))
    click.echo(u'(This can take a *long* time with no indication of progress.)')
    videos = scrape_videos(url)

    click.echo(u'Found {0} videos...'.format(len(videos)))
    for i, video in enumerate(videos):
        if video['source_url'] in source_map and not force:
            click.echo(u'Skipping {0}... already exists.'.format(
                stringify(video['title'])))
            continue

        filename = generate_filename(video['title'])
        filename = '{index:04d}_{basename}.json'.format(
            index=i, basename=filename[:40])

        click.echo(u'Created {0}... ({1})'.format(
            stringify(video['title']), filename))

        with open(os.path.join(jsonpath, filename), 'w') as fp:
            fp.write(convert_to_json(video))