def _get_entries(self): ret = [] chan_tag = self.__rss_xml.find('channel') with ThreadPool(max_workers=20) as pool: item_contents = pool.map( self.__get_html_content, [item_tag.find('link').text for item_tag in chan_tag.findall('item')] ) for item_tag, content in zip(chan_tag.findall('item'), item_contents): e = FeedEntry() e.load_extension('dc') item_details = self.__get_xml_dict( item_tag, ['title', 'link', 'guid', 'pubDate', '{%s}creator' % self.__DC_NS] ) e.title(item_details['title']) e.link(href=item_details['link'], rel='alternate') e.guid(item_details['guid']) e.dc.dc_creator(item_details['{%s}creator' % self.__DC_NS]) e.pubdate(dateparser.parse(item_details['pubDate'])) e.content('<p>%s</p>' % content, type='CDATA') ret.append(e) return ret
def get_feed_entry(media_file, basedir, baselink, image_url): ''' Generate a feed entry based on ID3 Data TODO: Deal with files with no ID3 Data ''' fe = FeedEntry () fe.load_extension('podcast') file_path = '{}/{}'.format(basedir, media_file) media_info = EasyID3(file_path) media_length_s = mutagen.File(file_path).info.length media_length = datetime.timedelta(seconds=round(media_length_s)) fe.title(media_info['title'][0]) fe.description('Part {} of {}'.format(media_info['tracknumber'][0], media_info['album'][0])) fe.podcast.itunes_duration(media_length) url = '{}/{}'.format(baselink, urllib.pathname2url(media_file)) fe.id(url) fe.link(href=url, rel='alternate') fe.pubdate('{} +0000'.format(datetime.datetime.utcfromtimestamp(os.path.getmtime(file_path)))) fe.enclosure(url, str(os.path.getsize(file_path)), mimetypes.guess_type(file_path)[0]) #Found no need to for this at this time since all podcasts have the same feed image #fe.podcast.itunes_image(image_url) return fe
def _parse_html(self, html): soup = BeautifulSoup(html, 'html.parser') ret = [] for i, li in enumerate(soup.find_all('li')): e = FeedEntry() e.load_extension('dc') e.title('title: <p> #%d' % i) e.link(href='http://%d.%s' % (i, self.__URL), rel='alternate') e.dc.dc_creator('author') e.description('description: <p> #%d' % i) e.content('content: %s' % li.text, type='CDATA') e.pubdate(datetime.now(pytz.utc) + timedelta(minutes=i)) ret.append(e) return ret
def _get_entries(self): playlist = self.__api.playlistItems().list( playlistId=self.__uploads_id, part="contentDetails", maxResults=20 ).execute() videos = self.__api.videos().list( id=','.join(item['contentDetails']['videoId'] for item in playlist['items']), part='snippet,contentDetails' ).execute() ret = [] for item in videos['items']: snip = item['snippet'] duration = self.__parse_duration(aniso8601.parse_duration(item['contentDetails']['duration']).seconds) title = '%s [%s]' % (snip['title'], duration) e = FeedEntry() e.load_extension('dc') e.dc.dc_creator('none') e.title(title) e.link(href=self.__VIDEO_URL % item['id'], rel='alternate') e.description(title) e.pubdate(aniso8601.parse_datetime(snip['publishedAt'])) content_args = { 'image': snip['thumbnails']['high']['url'], 'content': self.__parse_content(snip) # TODO: some comments i think? # 'comments': } e.content(self.__CONTENT % content_args, type='CDATA') ret.append(e) return ret