def _populate_bliptv_data(rss, metadata): """Parse bliptv video rss and pull out the metadata information. >>> rss = '''<?xml version="1.0" ?> ... <rss version="2.0" ... xmlns:media="http://search.yahoo.com/mrss/" ... xmlns:blip="http://blip.tv/dtd/blip/1.0"> ... <channel> ... <item> ... <title> ... Random Video ... </title> ... <blip:user>someuser</blip:user> ... <blip:puredescription> ... This is a random description. ... </blip:puredescription> ... <media:keywords>abc, def</media:keywords> ... <media:thumbnail url="http://someurl.com/somefile.jpg" /> ... </item> ... </channel> ... </rss> ... ''' >>> metadata = VideoMetadata() >>> _populate_bliptv_data(rss, metadata) >>> metadata.title u'Random Video' >>> metadata.description u'This is a random description.' >>> metadata.tags set([u'abc', u'def']) >>> metadata.thumbnail_url u'http://someurl.com/somefile.jpg' >>> metadata.author u'someuser' """ doc = minidom.parseString(rss) metadata.thumbnail_url = xpath_attr( \ doc, u'rss/channel/item/media:thumbnail', 'url') metadata.title = xpath_text( \ doc, u'rss/channel/item/title') metadata.author = xpath_text( \ doc, u'rss/channel/item/blip:user') metadata.description = xpath_text( \ doc, u'rss/channel/item/blip:puredescription') keywordtext = xpath_text( \ doc, u'rss/channel/item/media:keywords') or '' metadata.tags = set([x.strip() for x in keywordtext.split(',') if x.strip()])
def _populate_google_data(rss, metadata): """Parse google video rss and pull out the metadata information. >>> rss = '''<?xml version="1.0" ?> ... <rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/" xmlns:openSearch="http://a9.com/-/spec/opensearchrss/1.0/"> ... <channel> ... <title> ... Google Video - The Big Experiment & Rocky ... </title> ... <link> ... http://video.google.com/videoplay?docid=-274981837129821058 ... </link> ... <item> ... <author> ... Jon Doe ... </author> ... <media:group> ... <media:title> ... The Big Experiment & Rocky ... </media:title> ... <media:description> ... hello world ... ... Keywords: eepybird eepy bird ... </media:description> ... <media:thumbnail url="http://video.google.com/somepath.jpg" width="320"/> ... <media:content duration="23" /> ... </media:group> ... </item> ... </channel> ... </rss> ... ''' >>> metadata = VideoMetadata() >>> _populate_google_data(rss, metadata) >>> metadata.title u'The Big Experiment & Rocky' >>> metadata.description u'hello world' >>> metadata.tags set([u'eepybird', u'bird', u'eepy']) >>> metadata.thumbnail_url u'http://video.google.com/somepath.jpg' >>> metadata.author u'Jon Doe' >>> metadata.duration 23.0 """ doc = minidom.parseString(rss) metadata.thumbnail_url = xpath_attr( \ doc, u'rss/channel/item/media:group/media:thumbnail', 'url') metadata.title = xpath_text( \ doc, u'rss/channel/item/media:group/media:title') metadata.author = xpath_text( \ doc, u'rss/channel/item/author') duration = xpath_attr( \ doc, u'rss/channel/item/media:group/media:content', 'duration') if duration is not None and duration.strip() != '': try: metadata.duration = float(duration) except: # probably wasn't an int, ignoring pass text = xpath_text( \ doc, u'rss/channel/item/media:group/media:description') description = None tags = None if text: description = text pos = description.find('Keywords:') if pos > -1 and len(description) > pos + 9: keywordblurb = description[pos+9:] tags = set([x.strip() for x in keywordblurb.split(' ') if x.strip()]) if pos > -1: description = description[:pos] description = description.strip() metadata.description = description metadata.tags = tags