示例#1
0
def pull_cmd(cfg, parser, parsed, args):
    if not parsed.quiet:
        parser.print_byline()

    username = get_from_config(cfg, 'username')
    api_url = get_from_config(cfg, 'api_url')
    cat_title = get_from_config(cfg, 'category')

    # Command line api_key overrides config-set api_key
    api_key = parsed.apikey
    if not api_key:
        try:
            api_key = cfg.get('project', 'api_key')
        except ConfigParser.NoOptionError:
            pass
    if not api_key:
        err('Specify an api key either in steve.ini, on command line, '
            'or in API_KEY file.')
        return 1

    if not username or not api_url or not cat_title or not api_key:
        return 1

    api = steve.restapi.API(api_url)

    all_categories = steve.restapi.get_content(
        api.category.get(username=username, api_key=api_key,
                         limit=0))
    cat = [cat_item for cat_item in all_categories['objects']
           if cat_item['title'] == cat_title]

    if not cat:
        err('Category "{0}" does not exist.'.format(cat_title))
        return 1

    # Get the category from the list of 1.
    cat = cat[0]

    out('Retrieved category.')

    data = []

    for counter, video_url in enumerate(cat['videos']):
        # Lame, but good enough for now.
        video_id = video_url.split('/')[-2]

        video_data = steve.restapi.get_content(
            api.video(video_id).get(username=username,
                                    api_key=api_key))

        out('Working on "{0}"'.format(video_data['slug']))

        # Nix some tastypie bits from the data.
        for bad_key in ('resource_uri',):
            if bad_key in video_data:
                del video_data[bad_key]

        # Add id.
        video_data['id'] = video_id

        fn = 'json/{0:4d}_{1}.json'.format(counter, video_data['slug'])
        data.append((fn, video_data))

    out('Saving files....')
    save_json_files(cfg, data)

    return 0
        }
        return item



if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--maxresults", help="Max results", default=50)
    parser.add_argument("-c", "--channel", action='store_true', help="YouTube channel id")
    parser.add_argument("-p", "--playlist", action='store_true',  help="YouTube playlist id")
    args = parser.parse_args()

    cfg = get_project_config()

    scraper = YouTubeScraper(cfg, max_results=args.maxresults)

    if args.channel:
        channel_id = get_from_config(cfg, 'channel_id', 'youtube')
        print("scraping channel {}".format(channel_id))
        data = scraper.scrape_channel(channel_id)
        save_json_files(cfg, data)

    elif args.playlist:
        playlist_id = get_from_config(cfg, 'playlist_id', 'youtube')
        print("scraping playlist {}".format(playlist_id))
        data = scraper.scrape_playlist(playlist_id)
        save_json_files(cfg, data)

    else:
        print("nothing to do. no channel or playlist requested")
示例#3
0
def pull_cmd(cfg, parser, parsed, args):
    if not parsed.quiet:
        parser.print_byline()

    username = get_from_config(cfg, 'username')
    api_url = get_from_config(cfg, 'api_url')
    cat_title = get_from_config(cfg, 'category')

    # Command line api_key overrides config-set api_key
    api_key = parsed.apikey
    if not api_key:
        try:
            api_key = cfg.get('project', 'api_key')
        except ConfigParser.NoOptionError:
            pass
    if not api_key:
        err('Specify an api key either in steve.ini, on command line, '
            'or in API_KEY file.')
        return 1

    if not username or not api_url or not cat_title or not api_key:
        return 1

    api = steve.restapi.API(api_url)

    all_categories = steve.restapi.get_content(
        api.category.get(username=username, api_key=api_key, limit=0))
    cat = [
        cat_item for cat_item in all_categories['objects']
        if cat_item['title'] == cat_title
    ]

    if not cat:
        err('Category "{0}" does not exist.'.format(cat_title))
        return 1

    # Get the category from the list of 1.
    cat = cat[0]

    out('Retrieved category.')

    data = []

    for counter, video_url in enumerate(cat['videos']):
        # Lame, but good enough for now.
        video_id = video_url.split('/')[-2]

        video_data = steve.restapi.get_content(
            api.video(video_id).get(username=username, api_key=api_key))

        out('Working on "{0}"'.format(video_data['slug']))

        # Nix some tastypie bits from the data.
        for bad_key in ('resource_uri', ):
            if bad_key in video_data:
                del video_data[bad_key]

        # Add id.
        video_data['id'] = video_id

        fn = 'json/{0:4d}_{1}.json'.format(counter, video_data['slug'])
        data.append((fn, video_data))

    out('Saving files....')
    save_json_files(cfg, data)

    return 0
示例#4
0
文件: cmdline.py 项目: AvdN/steve
def pull(cfg, ctx, quiet, apikey):
    """Pulls data from a richard instance."""
    if not quiet:
        click.echo(VERSION)

    username = get_from_config(cfg, 'username')
    api_url = get_from_config(cfg, 'api_url')
    cat_title = get_from_config(cfg, 'category')

    # Command line api_key overrides config-set api_key
    if not apikey:
        try:
            apikey = cfg.get('project', 'api_key')
        except NoOptionError:
            pass
    if not apikey:
        raise click.ClickException(
            u'Specify an api key either in {0}, on command line, '
            u'or in API_KEY file.'.format(get_project_config_file_name())
        )

    if not username or not api_url or not cat_title or not apikey:
        raise click.ClickException(u'Missing username, api_url or api_key.')

    api = steve.restapi.API(api_url)

    all_categories = steve.restapi.get_content(
        api.category.get(username=username, api_key=apikey,
                         limit=0))
    cat = [cat_item for cat_item in all_categories['objects']
           if cat_item['title'] == cat_title]

    if not cat:
        raise click.ClickException(u'Category "{0}" does not exist.'.format(cat_title))

    # Get the category from the list of 1.
    cat = cat[0]

    click.echo('Retrieved category.')

    data = []

    for counter, video_url in enumerate(cat['videos']):
        # Lame, but good enough for now.
        video_id = video_url.split('/')[-2]

        video_data = steve.restapi.get_content(
            api.video(video_id).get(username=username,
                                    api_key=apikey))

        click.echo('Working on "{0}"'.format(video_data['slug']))

        # Nix some tastypie bits from the data.
        for bad_key in ('resource_uri',):
            if bad_key in video_data:
                del video_data[bad_key]

        # Add id.
        video_data['id'] = video_id

        fn = 'json/{0:4d}_{1}.json'.format(counter, video_data['slug'])
        data.append((fn, video_data))

    click.echo('Saving files....')
    save_json_files(cfg, data)
videos = category['videos']
    


# In[25]:

stevedata = []
for v in videos:
    r = requests.get(v)
    j = r.json()
    stevedata.append(('%s.json' % j['id'], j))


# In[27]:

save_json_files(cfg, stevedata)


# In[30]:

soup = BeautifulSoup(open('talks.html'))


# In[38]:

proposals_soup = soup.find_all('div', class_='proposal_list_summary')


# In[41]:

psoup = proposals_soup[0]
示例#6
0
文件: cmdline.py 项目: jethar/steve
def pull(cfg, ctx, quiet, apikey):
    """Pulls data from a richard instance."""
    if not quiet:
        click.echo(VERSION)

    username = get_from_config(cfg, 'username')
    api_url = get_from_config(cfg, 'api_url')
    cat_title = get_from_config(cfg, 'category')

    # Command line api_key overrides config-set api_key
    if not apikey:
        try:
            apikey = cfg.get('project', 'api_key')
        except ConfigParser.NoOptionError:
            pass
    if not apikey:
        raise click.ClickException(
            u'Specify an api key either in {0}, on command line, '
            u'or in API_KEY file.'.format(get_project_config_file_name())
        )

    if not username or not api_url or not cat_title or not apikey:
        raise click.ClickException(u'Missing username, api_url or api_key.')

    api = steve.restapi.API(api_url)

    all_categories = steve.restapi.get_content(
        api.category.get(username=username, api_key=apikey,
                         limit=0))
    cat = [cat_item for cat_item in all_categories['objects']
           if cat_item['title'] == cat_title]

    if not cat:
        raise click.ClickException(u'Category "{0}" does not exist.'.format(cat_title))

    # Get the category from the list of 1.
    cat = cat[0]

    click.echo('Retrieved category.')

    data = []

    for counter, video_url in enumerate(cat['videos']):
        video_id = get_video_id(video_url)

        video_data = steve.restapi.get_content(
            api.video(video_id).get(username=username,
                                    api_key=apikey))

        click.echo('Working on "{0}"'.format(video_data['slug']))

        # Nix some tastypie bits from the data.
        for bad_key in ('resource_uri',):
            if bad_key in video_data:
                del video_data[bad_key]

        # Add id.
        video_data['id'] = video_id

        fn = 'json/{0:4d}_{1}.json'.format(counter, video_data['slug'])
        data.append((fn, video_data))

    click.echo('Saving files....')
    save_json_files(cfg, data)