示例#1
0
def _populate_bliptv_data(rss, metadata):
    """Parse bliptv video rss and pull out the metadata information.

      >>> rss = '''<?xml version="1.0" ?>
      ... <rss version="2.0"
      ...      xmlns:media="http://search.yahoo.com/mrss/"
      ...      xmlns:blip="http://blip.tv/dtd/blip/1.0">
      ... <channel>
      ...     <item>
      ...       <title>
      ...         Random Video
      ...       </title>
      ...       <blip:user>someuser</blip:user>
      ...       <blip:puredescription>
      ...         This is a random description.
      ...       </blip:puredescription>
      ...       <media:keywords>abc, def</media:keywords>
      ...       <media:thumbnail url="http://someurl.com/somefile.jpg" />
      ...     </item>
      ...   </channel>
      ... </rss>
      ... '''

      >>> metadata = VideoMetadata()
      >>> _populate_bliptv_data(rss, metadata)

      >>> metadata.title
      u'Random Video'
      >>> metadata.description
      u'This is a random description.'
      >>> metadata.tags
      set([u'abc', u'def'])
      >>> metadata.thumbnail_url
      u'http://someurl.com/somefile.jpg'
      >>> metadata.author
      u'someuser'

    """
    doc = minidom.parseString(rss)
    metadata.thumbnail_url = xpath_attr( \
        doc, u'rss/channel/item/media:thumbnail', 'url')
    metadata.title = xpath_text( \
        doc, u'rss/channel/item/title')
    metadata.author = xpath_text( \
        doc, u'rss/channel/item/blip:user')
    metadata.description = xpath_text( \
        doc, u'rss/channel/item/blip:puredescription')

    keywordtext = xpath_text( \
        doc, u'rss/channel/item/media:keywords') or ''
    metadata.tags = set([x.strip()
                         for x in keywordtext.split(',') if x.strip()])
示例#2
0
def _populate_google_data(rss, metadata):
    """Parse google video rss and pull out the metadata information.

      >>> rss = '''<?xml version="1.0" ?>
      ... <rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/" xmlns:openSearch="http://a9.com/-/spec/opensearchrss/1.0/">
      ... <channel>
      ...     <title>
      ...       Google Video - The Big Experiment &amp; Rocky
      ...     </title>
      ...     <link>
      ...       http://video.google.com/videoplay?docid=-274981837129821058
      ...     </link>
      ...     <item>
      ...       <author>
      ...         Jon Doe
      ...       </author>
      ...       <media:group>
      ...         <media:title>
      ...           The Big Experiment &amp; Rocky
      ...         </media:title>
      ...         <media:description>
      ...           hello world
      ...
      ...           Keywords:  eepybird eepy bird
      ...         </media:description>
      ...         <media:thumbnail url="http://video.google.com/somepath.jpg" width="320"/>
      ...         <media:content duration="23" />
      ...       </media:group>
      ...     </item>
      ...   </channel>
      ... </rss>
      ... '''

      >>> metadata = VideoMetadata()
      >>> _populate_google_data(rss, metadata)

      >>> metadata.title
      u'The Big Experiment & Rocky'
      >>> metadata.description
      u'hello world'
      >>> metadata.tags
      set([u'eepybird', u'bird', u'eepy'])
      >>> metadata.thumbnail_url
      u'http://video.google.com/somepath.jpg'
      >>> metadata.author
      u'Jon Doe'
      >>> metadata.duration
      23.0

    """
    doc = minidom.parseString(rss)
    metadata.thumbnail_url = xpath_attr( \
        doc, u'rss/channel/item/media:group/media:thumbnail', 'url')
    metadata.title = xpath_text( \
        doc, u'rss/channel/item/media:group/media:title')
    metadata.author = xpath_text( \
        doc, u'rss/channel/item/author')

    duration = xpath_attr( \
        doc, u'rss/channel/item/media:group/media:content', 'duration')
    if duration is not None and duration.strip() != '':
        try:
            metadata.duration = float(duration)
        except:
            # probably wasn't an int, ignoring
            pass

    text = xpath_text( \
        doc, u'rss/channel/item/media:group/media:description')
    description = None
    tags = None
    if text:
        description = text
        pos = description.find('Keywords:')
        if pos > -1 and len(description) > pos + 9:
            keywordblurb = description[pos+9:]
            tags = set([x.strip() for x in keywordblurb.split(' ')
                        if x.strip()])
        if pos > -1:
            description = description[:pos]
        description = description.strip()

    metadata.description = description
    metadata.tags = tags