Python scrapevideo示例，steve.util.scrapevideo Python示例

示例#1

0

显示文件

文件： cmdline.py 项目： B-Rich/steve

def scrapevideo_cmd(parser, parsed, args):
    if not parsed.quiet:
        parser.print_byline()

    video_url = parsed.video[0]
    data = scrapevideo(video_url, parsed.richard, 'object')
    if parsed.save:
        cfg = get_project_config()

        projectpath = cfg.get('project', 'projectpath')
        jsonpath = os.path.join(projectpath, 'json')

        if not os.path.exists(jsonpath):
            os.makedirs(jsonpath)

        fn = 'json/' + generate_filename(data['title']) + '.json'

        if os.path.exists(fn):
            err('It already exists!')
            return 1

        with open(fn, 'w') as fp:
            fp.write(convert_to_json(data))
        print 'Saved as {0}'.format(fn)
    else:
        print convert_to_json(data)
    return 0

示例#2

0

显示文件

文件： add_to_richard.py 项目： codersquid/veyepar

    def get_scrapevideo_metadata(self, host_url):
        """ scrapes metadata from the host_url of the episode

        This is a wrapper around steve's scrapevideo. It preps
        the host_url if necessary, and repeatedly calls scrapevideo
        until no error is raised

        :arg ep: Episode of video to scrape
        :returns: dict of metadata, or {}

        """

        if host_url is None or host_url == '':
            # there's nothing to scrape
            return {}

        # FIXME: error handling kinda crazy here
        while True:
            # keep trying until it doesn't error doh!
            try:
                scraped_meta = scrapevideo(host_url)
                break
            except KeyError as e: 
                print "KeyError", e
            
        if self.options.verbose: 
            pprint.pprint( scraped_meta )

        return scraped_meta

示例#3

0

显示文件

文件： add_to_richard.py 项目： deeprave/veyepar

    def get_scrapevideo_metadata(self, host_url):
        """ scrapes metadata from the host_url of the episode

        This is a wrapper around steve's scrapevideo. It preps
        the host_url if necessary, and repeatedly calls scrapevideo
        until no error is raised

        :arg ep: Episode of video to scrape
        :returns: dict of metadata, or {}

        """

        if host_url is None or host_url == "":
            # there's nothing to scrape
            return {}

        # FIXME: error handling kinda crazy here
        while True:
            # keep trying until it doesn't error doh!
            try:
                scraped_meta = scrapevideo(host_url)
                break
            except KeyError as e:
                print "KeyError", e
            except requests.exceptions.Timeout as e:
                # requests.exceptions.Timeout: HTTPConnectionPool(host='www.youtube.com', port=80): Request timed out. (timeout=3)
                print "requests.exceptions.Timeout:", e
                print "looping..."

        if self.options.verbose:
            print "scraped_meta"
            pprint.pprint(scraped_meta)

        return scraped_meta

示例#4

0

显示文件

文件： cmdline.py 项目： codersquid/steve

def scrapevideo_cmd(parser, parsed, args):
    if not parsed.quiet:
        parser.print_byline()

    video_url = parsed.video[0]
    data = scrapevideo(video_url, parsed.richard, 'object')
    if parsed.save:
        cfg = get_project_config()

        projectpath = cfg.get('project', 'projectpath')
        jsonpath = os.path.join(projectpath, 'json')

        if not os.path.exists(jsonpath):
            os.makedirs(jsonpath)

        fn = 'json/' + generate_filename(data['title']) + '.json'

        if os.path.exists(fn):
            err('It already exists!')
            return 1

        with open(fn, 'w') as fp:
            fp.write(convert_to_json(data))
        print 'Saved as {0}'.format(fn)
    else:
        print convert_to_json(data)
    return 0

示例#5

0

显示文件

    def process_ep(self, ep):
        if self.options.verbose: print ep.id, ep.name

        meta = scrapevideo(ep.host_url)

        # print ep.host_url
        # print meta['description']
        description = html_to_markdown(meta['description'])
        ep.description = description

        title = html_to_markdown(meta['title'])
        if ep.name <> title:
            print ep.host_url
            print "veyepar:\t%s" % (ep.name, )
            print "  vimeo:\t%s" % (title, )
            print

        ep.save()
        ret = None
        return ret

示例#6

0

显示文件

文件： get_vimeo.py 项目： deeprave/veyepar

    def process_ep(self, ep):
        if self.options.verbose: print ep.id, ep.name

        meta = scrapevideo(ep.host_url)

        # print ep.host_url
        # print meta['description'] 
        description = html_to_markdown(meta['description'])
        ep.description = description 

        title = html_to_markdown(meta['title'])
        if ep.name <> title:
            print ep.host_url
            print "veyepar:\t%s" %( ep.name, )
            print "  vimeo:\t%s" %( title, )
            print


        ep.save()
        ret = None
        return ret

示例#7

0

显示文件

文件： add_to_richard.py 项目： yoe/veyepar

    def get_scrapevideo_metadata(self, host_url):
        """ scrapes metadata from the host_url of the episode

        This is a wrapper around steve's scrapevideo. It preps
        the host_url if necessary, and repeatedly calls scrapevideo
        until no error is raised

        :arg ep: Episode of video to scrape
        :returns: dict of metadata, or {}

        """

        if host_url is None or host_url == '':
            # there's nothing to scrape
            return {}

        # FIXME: error handling kinda crazy here
        while True:
            # keep trying until it doesn't error doh!
            try:
                scraped_meta = scrapevideo(host_url)
                break
            except KeyError as e: 
                print("KeyError", e)
            except requests.exceptions.Timeout as e: 
                # requests.exceptions.Timeout: HTTPConnectionPool(host='www.youtube.com', port=80): Request timed out. (timeout=3)
                print("requests.exceptions.Timeout:", e)
                print("looping...")


            
        if self.options.verbose: 
            print("scraped_meta")
            pprint.pprint( scraped_meta )

        return scraped_meta

示例#8

0

显示文件

文件： add_to_richard.py 项目： PaulWay/veyepar

    def process_ep(self, ep):
        if self.options.verbose: print "post_to_richard", ep.id, ep.name

        ### remove some day....
        if ep.host_url.startswith("http://gdata.youtube.com/feeds/api/videos/"):
            yt_id = ep.host_url.split('/')[-1]
            ep.host_url="http://youtube.com/watch?v=%s" % (yt_id,)
            
        # get the metadata from youtube
        # like thumb url and video embed code
        while True:
            # keep trying untill it doesn't error doh!
            try:
                yt_meta = scrapevideo(ep.host_url)
                break
            except KeyError as e: 
                print "KeyError", e
            
        if self.options.verbose: 
            pprint.pprint( yt_meta )
        
        # speakers = [] if ep.authors is None else ep.authors.split(',')
        speakers = ep.authors.split(',') if ep.authors else []

        tags = ep.tags.split(',')
        # remove blacklisted tags, 
        # and tags with a / in them.
        # and strip spaces 
        tags = [t.strip() for t in tags if t not in [
             u'enthought', 
             u'scipy_2012', 
             u'Introductory/Intermediate',
             ] 
             and '/' not in t 
             and t]

        host = pw.richard[self.options.host_user]

        # Create an api object with the target api root url.
        endpoint = 'http://%(host)s/api/v1/' % host 
        api = slumber.API(endpoint)
        ### api = slumber.API(endpoint, session=requests.session(
        ###   params={"username": host['user'], "api_key": host['api_key']}))

        # make sure the category exists.
        # This seems like a terible way to doing this, 
        # but I need to get something working today!!!
        # I am going to regret this later.
        # To the future me: Sorry.


        """
        if self.options.verbose: print "Show slug:", ep.show.slug, ep.show.client.name
        cats = api.category.get(limit=0)
        found = False
        for cat in cats['objects']:
            if self.options.verbose: print cat['id'], cat['slug'], cat['name']
            if cat['name']  ==  ep.show.name:
                found = True
                if self.options.verbose: print "found"
                break

        if not found:
            # The category doesn't exist yet, so create it

            if self.options.verbose:  print "creating..."
            cat_data = {
                'kind': 1,
                'name': ep.show.name,
                # 'name': ep.show.client.name,
                'title': ep.show.name,
                # 'title': ep.show.client.name,
                'description': '',
                'url': '',
                'whiteboard': '',
                # I think start_date should be blank, or .today()
                # 'start_date': '2012-07-16',
                # 'slug': ep.show.client.slug
                # 'slug': ep.show.slug
            }
            try:
                # cat = api.category.post(cat_data, 
                #    username=host['user'], api_key=host['api_key'])
                # if self.options.verbose:  print "created", cat
                pass
            except Exception as exc:
                # TODO: OMG gross.
                if exc.content.startswith('\n<!DOCTYPE html>'):
                    error_lines = [line for line in exc.content.splitlines()
                            if 'exception_value' in line]
                    for line in error_lines:
                         print line
                else:
                    print "exc.content:", exc.content.__repr__()

                raise

        # cat is now the category we want to use
        # either it was existing, or was just added.
        # category_key = cat['title']
        """

        # category_key = 'PyCon DE 2012'
        # category_key = 'PyCon DE 2012'
        category_key = 'ChiPy'
     
        description = (
            linebreaks(
            urlize(
            force_escape(ep.description))))
        slug = ep.slug.replace("_","-").lower()

        # Let's populate a video object and push it.
        video_data = {
    'state': 1, # 1=live, 2=draft
    'title': ep.name,
    'category': category_key,
    'summary': description,
    # 'slug': slug,  
    'source_url': ep.host_url,
    'copyright_text': ep.license,
    'tags': tags,
    'speakers': speakers,
    'recorded': ep.start.isoformat(),
    'language': 'English',
     #'language': 'German',
    'whiteboard': u'',
    'quality_notes': '',
    'description': u'',
    'thumbnail_url': yt_meta['thumbnail_url'],
    'video_ogv_url': ep.archive_ogv_url,
    'video_ogv_length': None,
    'video_mp4_url': ep.archive_mp4_url,
    'video_mp4_download_only': False,
    'video_mp4_length': None,
    'video_webm_url': u'',
    'video_webm_length': None,
    'video_flv_url': u'',
    'video_flv_length': None,
    'embed': yt_meta.get('object_embed_code',''),
}
        if self.options.verbose: pprint.pprint(video_data)


        try:
            if ep.public_url:
                # update
                vid_id = ep.public_url.split('/video/')[1].split('/')[0]
                updated = api.video(vid_id).put(video_data,
                    username=host['user'], api_key=host['api_key'])
                ret = updated
            else:
                # add
                vid = api.video.post(video_data,
                        username=host['user'], api_key=host['api_key'])
                # set to draft
                updated = api.video(vid['id']).put({
                    'state':2, 
                    'category': vid['category'],
                    'title': vid['title'],
                            },  
                        username=host['user'], api_key=host['api_key'])

                self.pvo_url = "http://%s/video/%s/%s" % (
                        host['host'], vid['id'],vid['slug'])
                if self.options.verbose: print self.pvo_url
                print self.pvo_url
                ep.public_url = self.pvo_url
                ret = self.pvo_url

        except Exception as exc:
            print "exc:", exc
            ret = False
            import code
            code.interact(local=locals())

            # TODO: OMG gross.
            if exc.content.startswith('\n<!DOCTYPE html>'):
                error_lines = [line for line in exc.content.splitlines()
                        if 'exception_value' in line]
                for line in error_lines:
                     print line
            else:
                print "exc.content:", exc.content

            raise

        ep.save()

        return ret