def test_mandatoryValues(self): # Try to create a Podcast once for each mandatory property. # On each iteration, exactly one of the properties is not set. # Therefore, an exception should be thrown on each iteration. mandatory_properties = set([ "description", "title", "link", "explicit", ]) for test_property in mandatory_properties: fg = Podcast() if test_property != "description": fg.description = self.description if test_property != "title": fg.name = self.name if test_property != "link": fg.website = self.website if test_property != "explicit": fg.explicit = self.explicit try: self.assertRaises(ValueError, fg._create_rss) except AssertionError as e: raise_from(AssertionError( "The test failed for %s" % test_property), e)
def test_constructor(self): # Overwrite fg from setup self.fg = Podcast( name=self.name, website=self.website, description=self.description, subtitle=self.subtitle, language=self.language, cloud=(self.cloudDomain, self.cloudPort, self.cloudPath, self.cloudRegisterProcedure, self.cloudProtocol), pubsubhubbub=self.pubsubhubbub, copyright=self.copyright, authors=[self.author], skip_days=self.skip_days, skip_hours=self.skip_hours, web_master=self.web_master, feed_url=self.feed_url, explicit=self.explicit, image=self.image, owner=self.owner, complete=self.complete, new_feed_url=self.new_feed_url, xslt=self.xslt, ) # Test that the fields are actually set self.test_baseFeed()
def album(self): page = requests.get(self.url, headers=self.header) soup = BeautifulSoup(page.content, "lxml") # 初始化 self.podcast = Podcast() self.podcast.name = soup.find('div', 'detailContent_title').get_text() self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.url self.podcast.copyright = 'cc-by' self.podcast.description = soup.find('div', 'mid_intro').get_text() self.podcast.language = 'cn' self.podcast.image = soup.find('a', 'albumface180').find('img').get('src').split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') sound_ids = soup.find('div', class_='personal_body').get('sound_ids').split(',') for sound_id in sound_ids: date = soup.find('li', sound_id=sound_id).find('div', class_='operate').get_text().strip() self.detail(sound_id, date) # 生成文件 # print self.podcast.rss_str() self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)
def album(self): album_info_content = requests.get(self.album_info_api).content album_info_data = json.loads(album_info_content) album_list_content = requests.get(self.album_list_api).content album_list_data = json.loads(album_list_content) self.podcast = Podcast() self.podcast.name = album_info_data['data']['title'] self.podcast.authors.append(Person("Powered by maijver", '*****@*****.**')) self.podcast.website = self.url self.podcast.copyright = 'cc-by' self.podcast.description = album_info_data['data']['description'] self.podcast.language = 'cn' self.podcast.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '') self.podcast.feed_url = 'http://podcast.forecho.com/qingting/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("maijver", '*****@*****.**') for each in album_list_data['data']: episode = self.podcast.add_episode() episode.id = str(each['id']) episode.title = each['title'] print(self.podcast.name + '=====' + each['title']) episode.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '') episode.summary = each['title'] episode.link = 'http://www.qingting.fm/channels/{}/programs/{}'.format(self.album_id, each['id']) episode.authors = [Person("forecho", '*****@*****.**')] episode.publication_date = self.reduction_time(each['update_time']) episode.media = Media("http://od.qingting.fm/{}".format(each['mediainfo']['bitrates_url'][0]['file_path']), each['duration']) self.podcast.rss_file('qingting/{}.rss'.format(self.album_id), minimize=True)
def main(): with open('thebugle.json') as f: episodes = json.load(f) p = Podcast( name="TimesOnLine Bugle Archive", description="Old Bugle episodes, podcast feed", website="https://www.thebuglepodcast.com/", explicit=False, ) for episode in episodes: ep = p.add_episode( Episode(title=f"{episode['id']}: {episode['title']}")) ep.media = Media.create_from_server_response( f"{MEDIA_BASE_URL}/{episode['file']}") ep.media.fetch_duration() date = episode['date'].split('-') ep.publication_date = datetime(int(date[0]), int(date[1]), int(date[2]), 0, 0, 0, tzinfo=pytz.utc) print(p.rss_str())
def generate_podcast_xml(podcasts): podcast = Podcast(name=config.PODCAST_NAME, description=config.PODCAST_DESCRIPTION, website=config.PODCAST_WEBSITE, explicit=config.PODCAST_CONTAINS_EXPLICIT_CONTENT, withhold_from_itunes=True) podcast.episodes = podcasts return podcast.rss_str()
def album(self): page = requests.get(self.album_url, headers=self.header) soup = BeautifulSoup(page.content, "lxml") # 初始化 self.podcast = Podcast() self.podcast.name = soup.find('h1', 'title').get_text() self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.album_url self.podcast.copyright = 'cc-by' if soup.find('div', 'album-intro') and soup.find('div', 'album-intro').get_text(): self.podcast.description = soup.find('div', 'album-intro').get_text() else: self.podcast.description = self.podcast.name self.podcast.language = 'cn' self.podcast.image = soup.find('div', 'album-info').find('img').get('src').split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') album_list_content = requests.get(self.album_list_api, headers=self.header).content album_list_data = json.loads(album_list_content.decode('utf-8')) count = len(album_list_data['data']['tracksAudioPlay']) for each in album_list_data['data']['tracksAudioPlay']: try: detail_url = 'http://www.ximalaya.com/tracks/%s.json' % each['trackId'] response = requests.get(detail_url, headers=self.header) item = json.loads(response.content) episode = self.podcast.add_episode() episode.id = str(each['index']) episode.title = each['trackName'] print(self.podcast.name + '=====' + each['trackName']) image = each['trackCoverPath'].split('!')[0] if (image[-4:] == '.gif' or image[-4:] == '.bmp'): episode.image = self.podcast.image else: episode.image = image if item['intro']: episode.summary = item['intro'].replace('\r\n', '') else: episode.summary = each['trackName'] episode.link = 'http://www.ximalaya.com%s' % each['albumUrl'] episode.authors = [Person("forecho", '*****@*****.**')] episode.publication_date = self.reduction_time(item['time_until_now'], item['formatted_created_at']) episode.media = Media(each['src'], each['duration']) episode.position = count - each['index'] + 1 except Exception as e: print('异常:', e) print('异常 URL:', 'http://www.ximalaya.com%s' % each['trackUrl']) traceback.print_exc() # 生成文件 # print self.podcast.rss_str() self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)
def test_removeEntryByIndex(self): fg = Podcast() self.feedId = 'http://example.com' self.title = 'Some Testfeed' fe = fg.add_episode() fe.id = 'http://lernfunk.de/media/654321/1' fe.title = 'The Third BaseEpisode' assert len(fg.episodes) == 1 fg.episodes.pop(0) assert len(fg.episodes) == 0
def generate_podcast_xml(base, books): from podgen import Podcast, Episode from datetime import timedelta from podgen import Media p = Podcast() p.name = "AeonNeo's Audiobooks" p.description = "Description" p.website = "www.yangvincent.com" p.explicit = False # create episode for book_name in books: ep = Episode() ep.title = book_name[:-4] full_path = base + '/files/' + book_name dev_path = 'files/' + book_name try: book_size = os.path.getsize(dev_path) except OSError as e: print(e) book_size = 0 ep.media = Media(full_path, type='audio/mp4a', size=book_size) p.episodes.append(ep) # Generate rss p.rss_file('skeleton/rss.xml', minimize=True)
class Qingting(object): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.url = 'http://www.qingting.fm/channels/{}'.format(album_id) self.album_list_api = "http://api2.qingting.fm/v6/media/channelondemands/{}/programs/order/0/curpage/1/pagesize/100".format( album_id) self.album_info_api = "http://api2.qingting.fm/v6/media/channelondemands/{}".format(album_id) def album(self): album_info_content = requests.get(self.album_info_api).content album_info_data = json.loads(album_info_content) album_list_content = requests.get(self.album_list_api).content album_list_data = json.loads(album_list_content) self.podcast = Podcast() self.podcast.name = album_info_data['data']['title'] self.podcast.authors.append(Person("Powered by maijver", '*****@*****.**')) self.podcast.website = self.url self.podcast.copyright = 'cc-by' self.podcast.description = album_info_data['data']['description'] self.podcast.language = 'cn' self.podcast.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '') self.podcast.feed_url = 'http://podcast.forecho.com/qingting/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("maijver", '*****@*****.**') for each in album_list_data['data']: episode = self.podcast.add_episode() episode.id = str(each['id']) episode.title = each['title'] print(episode.title) episode.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '') episode.summary = each['title'] episode.link = 'http://www.qingting.fm/channels/{}/programs/{}'.format(self.album_id, each['id']) episode.authors = [Person("forecho", '*****@*****.**')] episode.publication_date = self.reduction_time(each['update_time']) episode.media = Media("http://od.qingting.fm/{}".format(each['mediainfo']['bitrates_url'][0]['file_path']), each['duration']) self.podcast.rss_file('qingting/{}.rss'.format(self.album_id), minimize=True) @staticmethod def reduction_time(created_date): timestamp = datetime.strptime(created_date, "%Y-%m-%d %H:%M:%S") return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute, tzinfo=pytz.utc)
def main(event, context): dynamodb = boto3.resource('dynamodb', region_name='sa-east-1') table = dynamodb.Table('semservidor-dev') podcasts = table.scan() author = Person("Evandro Pires da Silva", "*****@*****.**") p = Podcast( name="Sem Servidor", description= "Podcast dedicado a arquitetura serverless, com conteúdo de qualidade em português.", website="https://semservidor.com.br", explicit=False, copyright="2020 Evandro Pires da Silva", language="pr-BR", authors=[author], feed_url= "https://3tz8r90j0d.execute-api.sa-east-1.amazonaws.com/dev/podcasts/rss", category=Category("Music", "Music History"), owner=author, image="http://d30gvsirhz3ono.cloudfront.net/logo_semservidor_teste.jpg", web_master=Person(None, "*****@*****.**")) items = podcasts['Items'] for item in items: base_url = "http://d30gvsirhz3ono.cloudfront.net/" file_path = base_url + item['info']['arquivo']['nome'] p.episodes += [ Episode(title=item['info']['episodio'], media=Media(file_path, int(item['info']['arquivo']['tamanho'])), summary=item['info']['descricao'], position=int(item['id'])) ] p.apply_episode_order() rss = p.rss_str() response = { "statusCode": 200, "headers": { "content-type": "application/xml" }, "body": rss } return response
def index(): '''https://podgen.readthedocs.io/en/latest/''' urls = get_temporary_link() p = Podcast() p.name = "ambience" p.description = "ambience" p.website = "LINK HERE" p.explicit = True for i, (size, url) in enumerate(urls): my_episode = Episode() my_episode.title = "ambience music {}".format(i + 1) my_episode.media = Media(url, size=size, type="audio/mpeg") p.episodes.append(my_episode) rss = str(p) return Response(rss, mimetype='text/xml')
def test_mandatoryValues(self): # Try to create a Podcast once for each mandatory property. # On each iteration, exactly one of the properties is not set. # Therefore, an exception should be thrown on each iteration. mandatory_properties = set([ "description", "title", "link", "explicit", ]) for test_property in mandatory_properties: fg = Podcast() if test_property != "description": fg.description = self.description if test_property != "title": fg.name = self.name if test_property != "link": fg.website = self.website if test_property != "explicit": fg.explicit = self.explicit try: self.assertRaises(ValueError, fg._create_rss) except AssertionError as e: raise_from( AssertionError("The test failed for %s" % test_property), e)
def rssfeed(request, programid): """ Builds the rss feed for a program identified by it's id. (int) 1. Fetches all episodes of the program from the digas db. 2. gets the programinfo from the app db 3. Uses podgen to do the actual XML-generation. """ podcasts = DigasPodcast.objects.using('digas').filter( softdel=0, program=int(programid)).only('program', 'title', 'remark', 'author', 'createdate', 'broadcastdate', 'filename', 'filesize', 'duration', 'softdel').order_by('-createdate') programinfo = ProgramInfo.objects.get(programid=int(programid)) # loading globalsettings here, and not at the module_level # This way django won't explode because of missing # constance_config table when we start on scratch # or set up in a new environment. from .models import globalsettings p = Podcast( name=programinfo.name, subtitle=programinfo.subtitle, description=programinfo.description, website=feed_url(programid), # programinfo.website, explicit=programinfo.explicit, category=Category(programinfo.category), authors=[globalsettings.owner], language=programinfo.language, owner=globalsettings.owner, feed_url=feed_url(programid), new_feed_url=feed_url(programid), image=programinfo.image_url, ) for episode in podcasts: # Get pubdate from createdate or broadcastdate pubdate = digas2pubdate(episode.createdate, episode.broadcastdate) # Add the episode to the list p.episodes.append( Episode( title=episode.title, media=Media(mp3url(episode.filename), episode.filesize), link=mp3url(episode.filename), # multifeedreader uses this. id=guid(episode.filename), summary=episode.remark, publication_date=pubdate)) # send it as unicode rss = u'%s' % p return HttpResponse(rss, content_type='application/xml')
def get_podcast(self): webpage = tools.get_url(self.album_info_url.format(self.album_id), self.headers) album_info = json.loads(webpage.decode('utf-8')) if album_info['ret'] == 200: album_info_data = album_info['data'] self.podcast = Podcast() self.podcast.name = album_info_data['mainInfo']['albumTitle'] self.podcast.website = self.album_url.format(self.album_id) if album_info_data['mainInfo']['richIntro']: self.podcast.description = album_info_data['mainInfo']['richIntro'] self.podcast.language = 'cn' self.podcast.image = 'https:' + album_info_data['mainInfo']['cover'].split('!')[0] self.podcast.generator = 'kanemori.getpodcast' self.podcast.explicit = False self.podcast.withhold_from_itunes = True text = '' page_num = 1 album_page_count = math.ceil(album_info_data['tracksInfo']['trackTotalCount'] / self.episode_pre_page) + 1 while page_num <= album_page_count: webpage = tools.get_url(self.album_list_url.format(self.album_id, page_num, self.episode_pre_page), self.headers) album_list = json.loads(webpage.decode('utf-8')) for episode_info in album_list['data']['tracksAudioPlay']: _, link = self.get_episode(episode_info['trackId']) text += link page_num += 1 path = './podcast/ximalaya' if not os.path.exists(path): os.makedirs(path) self.podcast.rss_file(os.path.join(path, '{}.xml'.format(self.album_id)), minimize=True) # tools.save_m4a(os.path.join(path, '{}.txt'.format(self.album_id)), text) print("「{}」が上手に焼きました".format(self.album_id))
def setUp(self): self.itunes_ns = 'http://www.itunes.com/dtds/podcast-1.0.dtd' self.dublin_ns = 'http://purl.org/dc/elements/1.1/' fg = Podcast() self.title = 'Some Testfeed' self.link = 'http://lernfunk.de' self.description = 'A cool tent' self.explicit = False fg.name = self.title fg.website = self.link fg.description = self.description fg.explicit = self.explicit fe = fg.add_episode() fe.id = 'http://lernfunk.de/media/654321/1' fe.title = 'The First Episode' self.fe = fe #Use also the list directly fe = Episode() fg.episodes.append(fe) fe.id = 'http://lernfunk.de/media/654321/1' fe.title = 'The Second Episode' fe = fg.add_episode() fe.id = 'http://lernfunk.de/media/654321/1' fe.title = 'The Third Episode' self.fg = fg warnings.simplefilter("always") def noop(*args, **kwargs): pass warnings.showwarning = noop
def scrape_morning_edition( web_session=requests_html.HTMLSession(), params=params): podcast = Podcast() podcast.name = "NPR Morning Edition" podcast.description = \ """Every weekday for over three decades, Morning Edition has taken listeners around the country and the world with two hours of multi-faceted stories and commentaries that inform, challenge and occasionally amuse. Morning Edition is the most listened-to news radio program in the country.""" podcast.website = "https://www.npr.org/programs/morning-edition" podcast.explicit = False scrape(web_session, params, 'morning-edition', podcast) rssfeed = podcast.rss_str(minimize=False) #log.debug(f"\n\nfeed { rssfeed }") return rssfeed
def rss(url_token): dropbox_access_token, title, description = get_the_latest_token_info( url_token) urls = get_temporary_link(dropbox_access_token) p = Podcast() p.name = title p.description = description p.website = "https://www.google.com" p.explicit = True for i, (size, url, uid, name) in enumerate(urls): my_episode = Episode() my_episode.title = os.path.splitext(name)[0] my_episode.id = uid my_episode.media = Media(url, size=size, type="audio/mpeg") p.episodes.append(my_episode) return Response(str(p), mimetype='text/xml')
def genero_feed(puntateList): if puntateList: # Creo un nuovo podcast p = Podcast() p.name = "Pascal Rai Radio 2" p.description = "Pascal un programma di Matteo Caccia in onda su Radio2 che racconta storie di vita. Episodi grandi o piccoli, stravolgenti o minuti, momenti che hanno modificato per sempre la nostra vita o che, anche se di poco, l'hanno indirizzata. Storie che sono il termometro della temperatura di ognuno di noi e che in parte raccontano chi siamo. " p.website = "http://www.raiplayradio.it/programmi/pascal/" p.explicit = True p.image = "https://rss.draghetti.it/pascal_image.jpg" p.feed_url = "https://rss.draghetti.it/pascal.xml" p.copyright = "Rai Radio 2" p.language = "it-IT" for puntata in puntateList: episode = Episode() episode.title = puntata[0].encode("ascii", "ignore") episode.link = puntata[1] # La dimensione del file e approssimativa episode.media = Media(puntata[3], puntata[4]) if puntata[2]: episode.publication_date = datetime.datetime(int(puntata[2].split("/")[2]), int(puntata[2].split("/")[1]), int(puntata[2].split("/")[0]), 20, 00, tzinfo=pytz.utc) else: episode.publication_date = pytz.utc.localize(datetime.datetime.utcnow()) p.episodes.append(episode) # Print to stdout, just as an example p.rss_file(rssfile, minimize=False)
def setUp(self): self.existing_locale = locale.setlocale(locale.LC_ALL, None) locale.setlocale(locale.LC_ALL, 'C') fg = Podcast() self.nsContent = "http://purl.org/rss/1.0/modules/content/" self.nsDc = "http://purl.org/dc/elements/1.1/" self.nsItunes = "http://www.itunes.com/dtds/podcast-1.0.dtd" self.feed_url = "http://example.com/feeds/myfeed.rss" self.name = 'Some Testfeed' # Use character not in ASCII to catch encoding errors self.author = Person('Jon Døll', '*****@*****.**') self.website = 'http://example.com' self.description = 'This is a cool feed!' self.subtitle = 'Coolest of all' self.language = 'en' self.cloudDomain = 'example.com' self.cloudPort = '4711' self.cloudPath = '/ws/example' self.cloudRegisterProcedure = 'registerProcedure' self.cloudProtocol = 'SOAP 1.1' self.pubsubhubbub = "http://pubsubhubbub.example.com/" self.contributor = { 'name': "Contributor Name", 'email': 'Contributor email' } self.copyright = "The copyright notice" self.docs = 'http://www.rssboard.org/rss-specification' self.skip_days = set(['Tuesday']) self.skip_hours = set([23]) self.explicit = False self.programname = podgen.version.name self.web_master = Person(email='*****@*****.**') self.image = "http://example.com/static/podcast.png" self.owner = self.author self.complete = True self.new_feed_url = "https://example.com/feeds/myfeed2.rss" self.xslt = "http://example.com/feed/stylesheet.xsl" fg.name = self.name fg.website = self.website fg.description = self.description fg.subtitle = self.subtitle fg.language = self.language fg.cloud = (self.cloudDomain, self.cloudPort, self.cloudPath, self.cloudRegisterProcedure, self.cloudProtocol) fg.pubsubhubbub = self.pubsubhubbub fg.copyright = self.copyright fg.authors.append(self.author) fg.skip_days = self.skip_days fg.skip_hours = self.skip_hours fg.web_master = self.web_master fg.feed_url = self.feed_url fg.explicit = self.explicit fg.image = self.image fg.owner = self.owner fg.complete = self.complete fg.new_feed_url = self.new_feed_url fg.xslt = self.xslt self.fg = fg warnings.simplefilter("always") def noop(*args, **kwargs): pass warnings.showwarning = noop
class Ximalaya: def __init__(self, album_id): self.headers = tools.get_headers() self.podcast = None self.album_id = album_id self.episode_pre_page = 30 self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}" self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}" self.episode_detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?trackId={}" self.album_url = "https://www.ximalaya.com/album/{}" def get_podcast(self): webpage = tools.get_url(self.album_info_url.format(self.album_id), self.headers) album_info = json.loads(webpage.decode('utf-8')) if album_info['ret'] == 200: album_info_data = album_info['data'] self.podcast = Podcast() self.podcast.name = album_info_data['mainInfo']['albumTitle'] self.podcast.website = self.album_url.format(self.album_id) if album_info_data['mainInfo']['richIntro']: self.podcast.description = album_info_data['mainInfo']['richIntro'] self.podcast.language = 'cn' self.podcast.image = 'https:' + album_info_data['mainInfo']['cover'].split('!')[0] self.podcast.generator = 'kanemori.getpodcast' self.podcast.explicit = False self.podcast.withhold_from_itunes = True text = '' page_num = 1 album_page_count = math.ceil(album_info_data['tracksInfo']['trackTotalCount'] / self.episode_pre_page) + 1 while page_num <= album_page_count: webpage = tools.get_url(self.album_list_url.format(self.album_id, page_num, self.episode_pre_page), self.headers) album_list = json.loads(webpage.decode('utf-8')) for episode_info in album_list['data']['tracksAudioPlay']: _, link = self.get_episode(episode_info['trackId']) text += link page_num += 1 path = './podcast/ximalaya' if not os.path.exists(path): os.makedirs(path) self.podcast.rss_file(os.path.join(path, '{}.xml'.format(self.album_id)), minimize=True) # tools.save_m4a(os.path.join(path, '{}.txt'.format(self.album_id)), text) print("「{}」が上手に焼きました".format(self.album_id)) def get_episode(self, episode_id): trycount = 0 findepisode = False while not findepisode: if trycount > 0: print("再接続中" + str(trycount) + "......") if trycount > 1: print("error url: " + self.episode_detail_url.format(episode_id) + "\n") return False, "error url: " + self.episode_detail_url.format(episode_id) + "\n" webpage = tools.get_url(self.episode_detail_url.format(episode_id), self.headers) detail = json.loads(webpage.decode('utf-8')) episode = self.podcast.add_episode() episode.id = str('ximalaya_' + str(episode_id)) episode.title = detail['title'] # print(self.podcast.name + '=====' + episode.title) if 'intro' in detail: episode.summary = detail['intro'].replace('\r', '\\r').replace('\n', '\\n') episode.publication_date = tools.publication_time(detail['createdAt']) episode.media = Media(detail['playUrl32'], duration=timedelta(milliseconds=detail['duration'])) # episode.media = Media.create_from_server_response(detail['playUrl32'], # duration=timedelta(seconds=detail['duration'])) episode.position = 1 findepisode = True if not findepisode: trycount += 1 print("30秒後に再接続する.......") sleep(30) return True, detail['playUrl32'] + '\n'
def genero_feed(puntateList): if puntateList: # Creo un nuovo podcast p = Podcast() p.name = "Il Ruggito del Coniglio" p.description = "Il Ruggito del Coniglio, il programma cult di Radio 2 condotto da Marco Presta e Antonello Dose, racconta l'attualita con folgorante ironia." p.website = "http://www.raiplayradio.it/programmi/ilruggitodelconiglio/" p.explicit = True p.image = "https://rss.draghetti.it/ruggitodelconiglio_image.jpg" p.feed_url = "https://rss.draghetti.it/ruggitodelconiglio.xml" p.copyright = "Rai Radio 2" p.language = "it-IT" for puntata in puntateList: episode = Episode() episode.title = puntata[0].encode("ascii", "ignore") episode.link = puntata[1] # La dimensione del file e approssimativa episode.media = Media(puntata[3], puntata[4]) if puntata[2]: episode.publication_date = datetime.datetime(int(puntata[2].split("/")[2]), int(puntata[2].split("/")[1]), int(puntata[2].split("/")[0]), 10, 00, tzinfo=pytz.utc) else: episode.publication_date = pytz.utc.localize(datetime.datetime.utcnow()) p.episodes.append(episode) # Print to stdout, just as an example p.rss_file(rssfile, minimize=False)
def generate_rss_from_articles(feed_settings, articles): """ Creates a FeedGenerator feed from a set of feed_entries. :param feed_settings: a feed_settings object containing :param articles: :return: """ # Initialize the feed podcast = Podcast() podcast.name = feed_settings.title author = Person(feed_settings.author['name'], feed_settings.author['email']) podcast.authors.append(author) podcast.website = feed_settings.source_page_url podcast.copyright = feed_settings.copyright podcast.description = feed_settings.subtitle podcast.summary = feed_settings.subtitle podcast.subtitle = feed_settings.subtitle podcast.language = 'vi' podcast.feed_url = feed_settings.output_url podcast.image = feed_settings.img_url podcast.category = Category('Music', 'Music Commentary') podcast.explicit = False # p.complete = False # p.new_feed_url = 'http://example.com/new-feed.rss' podcast.owner = author # p.xslt = "http://example.com/stylesheet.xsl" vt_tz = pytz.timezone('Asia/Ho_Chi_Minh') pastdate = datetime.datetime(2000, 1, 1, 0, 0).astimezone(vt_tz) # podcast.last_updated = datetime.datetime.now(vt_tz) for article in articles: episode = podcast.add_episode() episode.id = article.link episode.title = article.title episode.summary = article.description episode.link = article.link # episode.authors = [Person('Lars Kiesow', '*****@*****.**')] episode.publication_date = article.pub_date pastdate = max(pastdate, article.pub_date) # episode.media = Media.create_from_server_response(article.media, size=None, duration=None) episode.media = Media(article.media, size=None, duration=None, type=article.type) podcast.last_updated = pastdate podcast.publication_date = pastdate return podcast
class Ximalaya(): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.album_list_api = "http://www.ximalaya.com/revision/play/album?albumId={}&pageNum=1&sort=1&pageSize=999".format(album_id) self.album_url = 'http://www.ximalaya.com/album/%s' % album_id self.header = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': self.album_url, 'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1', } def album(self): page = requests.get(self.album_url, headers=self.header) soup = BeautifulSoup(page.content, "lxml") # 初始化 self.podcast = Podcast() self.podcast.name = soup.find('h1', 'title').get_text() self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.album_url self.podcast.copyright = 'cc-by' if soup.find('div', 'album-intro') and soup.find('div', 'album-intro').get_text(): self.podcast.description = soup.find('div', 'album-intro').get_text() else: self.podcast.description = self.podcast.name self.podcast.language = 'cn' self.podcast.image = soup.find('div', 'album-info').find('img').get('src').split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') album_list_content = requests.get(self.album_list_api, headers=self.header).content album_list_data = json.loads(album_list_content.decode('utf-8')) count = len(album_list_data['data']['tracksAudioPlay']) for each in album_list_data['data']['tracksAudioPlay']: try: detail_url = 'http://www.ximalaya.com/tracks/%s.json' % each['trackId'] response = requests.get(detail_url, headers=self.header) item = json.loads(response.content) episode = self.podcast.add_episode() episode.id = str(each['index']) episode.title = each['trackName'] print(self.podcast.name + '=====' + each['trackName']) image = each['trackCoverPath'].split('!')[0] if (image[-4:] == '.gif' or image[-4:] == '.bmp'): episode.image = self.podcast.image else: episode.image = image if item['intro']: episode.summary = item['intro'].replace('\r\n', '') else: episode.summary = each['trackName'] episode.link = 'http://www.ximalaya.com%s' % each['albumUrl'] episode.authors = [Person("forecho", '*****@*****.**')] episode.publication_date = self.reduction_time(item['time_until_now'], item['formatted_created_at']) episode.media = Media(each['src'], each['duration']) episode.position = count - each['index'] + 1 except Exception as e: print('异常:', e) print('异常 URL:', 'http://www.ximalaya.com%s' % each['trackUrl']) traceback.print_exc() # 生成文件 # print self.podcast.rss_str() self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True) # 时间转换 第一个参数是 "3年前", "12月11日 17:00" @staticmethod def reduction_time(time_until_now, created_at): date = datetime.strptime(created_at, "%m月%d日 %H:%M") reduction_year = datetime.now().year if '年前' in time_until_now: year = int(time_until_now.split('年前')[0]) reduction = (datetime.now(tzlocal()) - relativedelta(years=year)) if humanize_time(reduction) != ('%s years' % year): reduction_year = (datetime.now(tzlocal()) - relativedelta(years=year + 1)).year else: reduction_year = reduction.year elif '月前' in time_until_now: month = int(time_until_now.split('月前')[0]) reduction_year = (datetime.now(tzlocal()) - relativedelta(months=month)).year elif '天前' in time_until_now: day = int(time_until_now.split('天前')[0]) reduction_year = (datetime.now(tzlocal()) - relativedelta(days=day)).year return datetime(reduction_year, date.month, date.day, date.hour, date.second, tzinfo=pytz.utc)
def genero_feed(puntateList): if puntateList: # Creo un nuovo podcast p = Podcast() p.name = "Pascal Rai Radio 2" p.description = "Pascal un programma di Matteo Caccia in onda su Radio2 che racconta storie di vita. Episodi grandi o piccoli, stravolgenti o minuti, momenti che hanno modificato per sempre la nostra vita o che, anche se di poco, l'hanno indirizzata. Storie che sono il termometro della temperatura di ognuno di noi e che in parte raccontano chi siamo. " p.website = "http://www.raiplayradio.it/programmi/pascal/" p.explicit = True p.image = "https://rss.draghetti.it/pascal_image.jpg" p.feed_url = "https://rss.draghetti.it/pascal.xml" p.copyright = "Rai Radio 2" p.language = "it-IT" for puntata in puntateList: episode = Episode() episode.title = puntata[0].encode("ascii", "ignore") episode.link = puntata[1] # La dimensione del file e approssimativa episode.media = Media(puntata[3], puntata[4]) if puntata[2]: episode.publication_date = datetime.datetime( int(puntata[2].split("/")[2]), int(puntata[2].split("/")[1]), int(puntata[2].split("/")[0]), 20, 00, tzinfo=pytz.utc) else: episode.publication_date = pytz.utc.localize( datetime.datetime.utcnow()) p.episodes.append(episode) # Print to stdout, just as an example p.rss_file(rssfile, minimize=False)
class Ximalaya(): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.page_size = 30 self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}" self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}" self.detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?device=android&trackId={}" self.album_url = "https://www.ximalaya.com/album/{}" self.time_api = 'https://www.ximalaya.com/revision/time' self.s = requests.session() self.header = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36', 'Content-Type': 'application/json;charset=UTF-8', 'Referer': self.album_url.format(self.album_id), 'Accept-Encoding': "gzip, deflate", 'Connection': "keep-alive", 'cache-control': "no-cache", } def album(self): self.get_sign() album_info = self.s.get(self.album_info_url.format(self.album_id), headers=self.header).content album_info_content = json.loads(album_info.decode('utf-8')) if album_info_content['ret'] == 200: album_info_data = album_info_content['data'] # 初始化 self.podcast = Podcast() self.podcast.name = album_info_data['mainInfo']['albumTitle'] self.podcast.authors.append( Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.album_url.format(self.album_id) self.podcast.copyright = 'cc-by' if album_info_data['mainInfo']['richIntro']: self.podcast.description = album_info_data['mainInfo'][ 'richIntro'] else: self.podcast.description = self.podcast.name self.podcast.language = 'cn' self.podcast.image = 'https:' + album_info_data['mainInfo'][ 'cover'].split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') page_num = 1 # py2 +1 track_total_count = math.ceil( album_info_data['tracksInfo']['trackTotalCount'] / self.page_size) + 1 while page_num <= track_total_count: self.header["Host"] = "www.ximalaya.com" album_list = self.s.get(self.album_list_url.format( self.album_id, page_num, self.page_size), headers=self.header).content album_list_content = json.loads(album_list.decode('utf-8')) count = len(album_list_content['data']['tracksAudioPlay']) for each in album_list_content['data']['tracksAudioPlay']: try: self.header["Host"] = "mobile.ximalaya.com" detail = requests.get(self.detail_url.format( each['trackId']), headers=self.header).content detail_content = json.loads(detail.decode('utf-8')) episode = self.podcast.add_episode() episode.id = str(each['index']) episode.title = each['trackName'] print(self.podcast.name + '=====' + each['trackName']) image = each['trackCoverPath'].split('!')[0] if image[-4:] == '.png' or image[-4:] == '.jpg': episode.image = 'https:' + image else: episode.image = self.podcast.image if 'intro' in detail_content: episode.summary = detail_content['intro'].replace( '\r\n', '') else: episode.summary = each['trackName'] episode.link = 'http://www.ximalaya.com%s' % each[ 'albumUrl'] episode.authors = [ Person("forecho", '*****@*****.**') ] episode.publication_date = self.reduction_time( detail_content['createdAt']) episode.media = Media(each['src'], each['duration']) episode.position = count - each['index'] + 1 except Exception as e: print('异常:', e) print('异常 URL:', 'https://www.ximalaya.com%s' % each['trackUrl']) traceback.print_exc() # 生成文件 # print self.podcast.rss_str() page_num = page_num + 1 self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True) def get_time(self): """ 获取服务器时间戳 :return: """ r = self.s.get(self.time_api, headers=self.header) return r.text def get_sign(self): """ 获取sign: md5(ximalaya-服务器时间戳)(100以内随机数)服务器时间戳(100以内随机数)现在时间戳 :return: xm_sign """ now_time = str(round(time.time() * 1000)) server_time = self.get_time() sign = str( hashlib.md5("himalaya-{}".format(server_time).encode()).hexdigest( )) + "({})".format(str(round( random.random() * 100))) + server_time + "({})".format( str(round(random.random() * 100))) + now_time self.header["xm-sign"] = sign # print(sign) # return sign # 时间转换 参数 毫秒时间戳 @staticmethod def reduction_time(time): timestamp = datetime.fromtimestamp(time / 1000) return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute, tzinfo=pytz.utc)
def setUp(self): fg = Podcast() self.nsContent = "http://purl.org/rss/1.0/modules/content/" self.nsDc = "http://purl.org/dc/elements/1.1/" self.nsItunes = "http://www.itunes.com/dtds/podcast-1.0.dtd" self.feed_url = "http://example.com/feeds/myfeed.rss" self.name = 'Some Testfeed' self.author = Person('John Doe', '*****@*****.**') self.website = 'http://example.com' self.description = 'This is a cool feed!' self.subtitle = 'Coolest of all' self.language = 'en' self.cloudDomain = 'example.com' self.cloudPort = '4711' self.cloudPath = '/ws/example' self.cloudRegisterProcedure = 'registerProcedure' self.cloudProtocol = 'SOAP 1.1' self.pubsubhubbub = "http://pubsubhubbub.example.com/" self.contributor = {'name':"Contributor Name", 'email': 'Contributor email'} self.copyright = "The copyright notice" self.docs = 'http://www.rssboard.org/rss-specification' self.skip_days = set(['Tuesday']) self.skip_hours = set([23]) self.explicit = False self.programname = podgen.version.name self.web_master = Person(email='*****@*****.**') self.image = "http://example.com/static/podcast.png" self.owner = self.author self.complete = True self.new_feed_url = "https://example.com/feeds/myfeed2.rss" self.xslt = "http://example.com/feed/stylesheet.xsl" fg.name = self.name fg.website = self.website fg.description = self.description fg.subtitle = self.subtitle fg.language = self.language fg.cloud = (self.cloudDomain, self.cloudPort, self.cloudPath, self.cloudRegisterProcedure, self.cloudProtocol) fg.pubsubhubbub = self.pubsubhubbub fg.copyright = self.copyright fg.authors.append(self.author) fg.skip_days = self.skip_days fg.skip_hours = self.skip_hours fg.web_master = self.web_master fg.feed_url = self.feed_url fg.explicit = self.explicit fg.image = self.image fg.owner = self.owner fg.complete = self.complete fg.new_feed_url = self.new_feed_url fg.xslt = self.xslt self.fg = fg warnings.simplefilter("always") def noop(*args, **kwargs): pass warnings.showwarning = noop
def album(self): album_info = requests.get(self.album_info_url.format(self.album_id), headers=self.header).content album_info_content = json.loads(album_info.decode('utf-8')) if album_info_content['ret'] == 200: album_info_data = album_info_content['data'] # 初始化 self.podcast = Podcast() self.podcast.name = album_info_data['mainInfo']['albumTitle'] self.podcast.authors.append( Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.album_url.format(self.album_id) self.podcast.copyright = 'cc-by' if album_info_data['mainInfo']['richIntro']: self.podcast.description = album_info_data['mainInfo'][ 'richIntro'] else: self.podcast.description = self.podcast.name self.podcast.language = 'cn' self.podcast.image = 'https:' + album_info_data['mainInfo'][ 'cover'].split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') page_num = 1 # py2 +1 track_total_count = math.ceil( album_info_data['tracksInfo']['trackTotalCount'] / self.page_size) + 1 while page_num <= track_total_count: album_list = requests.get(self.album_list_url.format( self.album_id, page_num, self.page_size), headers=self.header).content album_list_content = json.loads(album_list.decode('utf-8')) count = len(album_list_content['data']['tracksAudioPlay']) for each in album_list_content['data']['tracksAudioPlay']: try: detail = requests.get(self.detail_url.format( each['trackId']), headers=self.header).content detail_content = json.loads(detail.decode('utf-8')) episode = self.podcast.add_episode() episode.id = str(each['index']) episode.title = each['trackName'] print(self.podcast.name + '=====' + each['trackName']) image = each['trackCoverPath'].split('!')[0] if image[-4:] == '.png' or image[-4:] == '.jpg': episode.image = 'https:' + image else: episode.image = self.podcast.image if 'intro' in detail_content: episode.summary = detail_content['intro'].replace( '\r\n', '') else: episode.summary = each['trackName'] episode.link = 'http://www.ximalaya.com%s' % each[ 'albumUrl'] episode.authors = [ Person("forecho", '*****@*****.**') ] episode.publication_date = self.reduction_time( detail_content['createdAt']) episode.media = Media(each['src'], each['duration']) episode.position = count - each['index'] + 1 except Exception as e: print('异常:', e) print('异常 URL:', 'https://www.ximalaya.com%s' % each['trackUrl']) traceback.print_exc() # 生成文件 # print self.podcast.rss_str() page_num = page_num + 1 self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)
class TestPodcast(unittest.TestCase): def setUp(self): self.existing_locale = locale.setlocale(locale.LC_ALL, None) locale.setlocale(locale.LC_ALL, 'C') fg = Podcast() self.nsContent = "http://purl.org/rss/1.0/modules/content/" self.nsDc = "http://purl.org/dc/elements/1.1/" self.nsItunes = "http://www.itunes.com/dtds/podcast-1.0.dtd" self.feed_url = "http://example.com/feeds/myfeed.rss" self.name = 'Some Testfeed' # Use character not in ASCII to catch encoding errors self.author = Person('Jon Døll', '*****@*****.**') self.website = 'http://example.com' self.description = 'This is a cool feed!' self.subtitle = 'Coolest of all' self.language = 'en' self.cloudDomain = 'example.com' self.cloudPort = '4711' self.cloudPath = '/ws/example' self.cloudRegisterProcedure = 'registerProcedure' self.cloudProtocol = 'SOAP 1.1' self.pubsubhubbub = "http://pubsubhubbub.example.com/" self.contributor = { 'name': "Contributor Name", 'email': 'Contributor email' } self.copyright = "The copyright notice" self.docs = 'http://www.rssboard.org/rss-specification' self.skip_days = set(['Tuesday']) self.skip_hours = set([23]) self.explicit = False self.programname = podgen.version.name self.web_master = Person(email='*****@*****.**') self.image = "http://example.com/static/podcast.png" self.owner = self.author self.complete = True self.new_feed_url = "https://example.com/feeds/myfeed2.rss" self.xslt = "http://example.com/feed/stylesheet.xsl" fg.name = self.name fg.website = self.website fg.description = self.description fg.subtitle = self.subtitle fg.language = self.language fg.cloud = (self.cloudDomain, self.cloudPort, self.cloudPath, self.cloudRegisterProcedure, self.cloudProtocol) fg.pubsubhubbub = self.pubsubhubbub fg.copyright = self.copyright fg.authors.append(self.author) fg.skip_days = self.skip_days fg.skip_hours = self.skip_hours fg.web_master = self.web_master fg.feed_url = self.feed_url fg.explicit = self.explicit fg.image = self.image fg.owner = self.owner fg.complete = self.complete fg.new_feed_url = self.new_feed_url fg.xslt = self.xslt self.fg = fg warnings.simplefilter("always") def noop(*args, **kwargs): pass warnings.showwarning = noop def tearDown(self): locale.setlocale(locale.LC_ALL, self.existing_locale) def test_constructor(self): # Overwrite fg from setup self.fg = Podcast( name=self.name, website=self.website, description=self.description, subtitle=self.subtitle, language=self.language, cloud=(self.cloudDomain, self.cloudPort, self.cloudPath, self.cloudRegisterProcedure, self.cloudProtocol), pubsubhubbub=self.pubsubhubbub, copyright=self.copyright, authors=[self.author], skip_days=self.skip_days, skip_hours=self.skip_hours, web_master=self.web_master, feed_url=self.feed_url, explicit=self.explicit, image=self.image, owner=self.owner, complete=self.complete, new_feed_url=self.new_feed_url, xslt=self.xslt, ) # Test that the fields are actually set self.test_baseFeed() def test_constructorUnknownAttributes(self): self.assertRaises(TypeError, Podcast, naem="Oh, looks like a typo") self.assertRaises(TypeError, Podcast, "Haha, No Keyword") def test_baseFeed(self): fg = self.fg assert fg.name == self.name assert fg.authors[0] == self.author assert fg.web_master == self.web_master assert fg.website == self.website assert fg.description == self.description assert fg.subtitle == self.subtitle assert fg.language == self.language assert fg.feed_url == self.feed_url assert fg.image == self.image assert fg.owner == self.owner assert fg.complete == self.complete assert fg.pubsubhubbub == self.pubsubhubbub assert fg.cloud == (self.cloudDomain, self.cloudPort, self.cloudPath, self.cloudRegisterProcedure, self.cloudProtocol) assert fg.copyright == self.copyright assert fg.new_feed_url == self.new_feed_url assert fg.skip_days == self.skip_days assert fg.skip_hours == self.skip_hours assert fg.xslt == self.xslt def test_rssFeedFile(self): fg = self.fg rssString = self.getRssFeedFileContents(fg, xml_declaration=False)\ .replace('\n', '') self.checkRssString(rssString) def getRssFeedFileContents(self, fg, **kwargs): # Keep track of our temporary file and its filename filename = None file = None encoding = 'UTF-8' try: # Get our temporary file name file = tempfile.NamedTemporaryFile(delete=False) filename = file.name # Close the file; we will just use its name file.close() # Write the RSS to the file (overwriting it) fg.rss_file(filename=filename, encoding=encoding, **kwargs) # Read the resulting RSS with open(filename, "r", encoding=encoding) as myfile: rssString = myfile.read() finally: # We don't need the file any longer, so delete it if filename: os.unlink(filename) elif file: # Ops, we were interrupted between the first and second stmt filename = file.name file.close() os.unlink(filename) else: # We were interrupted between entering the try-block and # getting the temporary file. Not much we can do. pass return rssString def test_rssFeedString(self): fg = self.fg rssString = fg.rss_str(xml_declaration=False) self.checkRssString(rssString) def test_rssStringAndFileAreEqual(self): rss_string = self.fg.rss_str() rss_file = self.getRssFeedFileContents(self.fg) self.assertEqual(rss_string, rss_file) def checkRssString(self, rssString): feed = etree.fromstring(rssString) nsRss = self.nsContent nsAtom = "http://www.w3.org/2005/Atom" channel = feed.find("channel") assert channel != None assert channel.find("title").text == self.name assert channel.find("description").text == self.description assert channel.find("{%s}subtitle" % self.nsItunes).text == \ self.subtitle assert channel.find("link").text == self.website assert channel.find("lastBuildDate").text != None assert channel.find("language").text == self.language assert channel.find( "docs").text == "http://www.rssboard.org/rss-specification" assert self.programname in channel.find("generator").text assert channel.find("cloud").get('domain') == self.cloudDomain assert channel.find("cloud").get('port') == self.cloudPort assert channel.find("cloud").get('path') == self.cloudPath assert channel.find("cloud").get( 'registerProcedure') == self.cloudRegisterProcedure assert channel.find("cloud").get('protocol') == self.cloudProtocol assert channel.find("copyright").text == self.copyright assert channel.find("docs").text == self.docs assert self.author.email in channel.find("managingEditor").text assert channel.find("skipDays").find("day").text in self.skip_days assert int( channel.find("skipHours").find("hour").text) in self.skip_hours assert self.web_master.email in channel.find("webMaster").text links = channel.findall("{%s}link" % nsAtom) selflinks = [link for link in links if link.get('rel') == 'self'] hublinks = [link for link in links if link.get('rel') == 'hub'] assert selflinks, "No <atom:link rel='self'> element found" selflink = selflinks[0] assert selflink.get('href') == self.feed_url assert selflink.get('type') == 'application/rss+xml' assert hublinks, "No <atom:link rel='hub'> element found" hublink = hublinks[0] assert hublink.get('href') == self.pubsubhubbub assert hublink.get('type') is None assert channel.find("{%s}image" % self.nsItunes).get('href') == \ self.image owner = channel.find("{%s}owner" % self.nsItunes) assert owner.find("{%s}name" % self.nsItunes).text == self.owner.name assert owner.find("{%s}email" % self.nsItunes).text == self.owner.email assert channel.find("{%s}complete" % self.nsItunes).text.lower() == \ "yes" assert channel.find("{%s}new-feed-url" % self.nsItunes).text == \ self.new_feed_url def test_feedUrlValidation(self): self.assertRaises(ValueError, setattr, self.fg, "feed_url", "example.com/feed.rss") def test_generator(self): software_name = "My Awesome Software" software_version = (1, 0) software_url = "http://example.com/awesomesoft/" # Using set_generator, text includes python-podgen self.fg.set_generator(software_name) rss = self.fg._create_rss() generator = rss.find("channel").find("generator").text assert software_name in generator assert self.programname in generator # Using set_generator, text excludes python-podgen self.fg.set_generator(software_name, exclude_podgen=True) generator = self.fg._create_rss().find("channel").find( "generator").text assert software_name in generator assert self.programname not in generator # Using set_generator, text includes name, version and url self.fg.set_generator(software_name, software_version, software_url) generator = self.fg._create_rss().find("channel").find( "generator").text assert software_name in generator assert str(software_version[0]) in generator assert str(software_version[1]) in generator assert software_url in generator # Using generator directly, text excludes python-podgen self.fg.generator = software_name generator = self.fg._create_rss().find("channel").find( "generator").text assert software_name in generator assert self.programname not in generator def test_str(self): assert str(self.fg) == self.fg.rss_str(minimize=False, encoding="UTF-8", xml_declaration=True) def test_updated(self): date = datetime.datetime(2016, 1, 1, 0, 10, tzinfo=dateutil.tz.tzutc()) def getLastBuildDateElement(fg): return fg._create_rss().find("channel").find("lastBuildDate") # Test that it has a default assert getLastBuildDateElement(self.fg) is not None # Test that it respects my custom value self.fg.last_updated = date lastBuildDate = getLastBuildDateElement(self.fg) assert lastBuildDate is not None assert dateutil.parser.parse(lastBuildDate.text) == date # Test that it is left out when set to False self.fg.last_updated = False lastBuildDate = getLastBuildDateElement(self.fg) assert lastBuildDate is None def test_AuthorEmail(self): # Just email - so use managingEditor, not dc:creator or itunes:author # This is per the RSS best practices, see the section about dc:creator self.fg.authors = [Person(None, "*****@*****.**")] channel = self.fg._create_rss().find("channel") # managingEditor uses email? assert channel.find("managingEditor").text == self.fg.authors[0].email # No dc:creator? assert channel.find("{%s}creator" % self.nsDc) is None # No itunes:author? assert channel.find("{%s}author" % self.nsItunes) is None def test_AuthorName(self): # Just name - use dc:creator and itunes:author, not managingEditor self.fg.authors = [Person("Just a. Name")] channel = self.fg._create_rss().find("channel") # No managingEditor? assert channel.find("managingEditor") is None # dc:creator equals name? assert channel.find("{%s}creator" % self.nsDc).text == \ self.fg.authors[0].name # itunes:author equals name? assert channel.find("{%s}author" % self.nsItunes).text == \ self.fg.authors[0].name def test_AuthorNameAndEmail(self): # Both name and email - use managingEditor and itunes:author, # not dc:creator self.fg.authors = [Person("Both a name", "*****@*****.**")] channel = self.fg._create_rss().find("channel") # Does managingEditor follow the pattern "email (name)"? self.assertEqual( self.fg.authors[0].email + " (" + self.fg.authors[0].name + ")", channel.find("managingEditor").text) # No dc:creator? assert channel.find("{%s}creator" % self.nsDc) is None # itunes:author uses name only? assert channel.find("{%s}author" % self.nsItunes).text == \ self.fg.authors[0].name def test_multipleAuthors(self): # Multiple authors - use itunes:author and dc:creator, not # managingEditor. person1 = Person("Multiple", "*****@*****.**") person2 = Person("Are", "*****@*****.**") self.fg.authors = [person1, person2] channel = self.fg._create_rss().find("channel") # Test dc:creator author_elements = \ channel.findall("{%s}creator" % self.nsDc) author_texts = [e.text for e in author_elements] assert len(author_texts) == 2 assert person1.name in author_texts[0] assert person1.email in author_texts[0] assert person2.name in author_texts[1] assert person2.email in author_texts[1] # Test itunes:author itunes_author = channel.find("{%s}author" % self.nsItunes) assert itunes_author is not None itunes_author_text = itunes_author.text assert person1.name in itunes_author_text assert person1.email not in itunes_author_text assert person2.name in itunes_author_text assert person2.email not in itunes_author_text # Test that managingEditor is not used assert channel.find("managingEditor") is None def test_authorsInvalidValue(self): self.assertRaises(TypeError, self.do_authorsInvalidValue) def do_authorsInvalidValue(self): self.fg.authors = Person("Opsie", "*****@*****.**") def test_webMaster(self): self.fg.web_master = Person(None, "*****@*****.**") channel = self.fg._create_rss().find("channel") assert channel.find("webMaster").text == self.fg.web_master.email self.assertRaises(ValueError, setattr, self.fg, "web_master", Person("Mr. No Email Address")) self.fg.web_master = Person("Both a name", "*****@*****.**") channel = self.fg._create_rss().find("channel") # Does webMaster follow the pattern "email (name)"? self.assertEqual( self.fg.web_master.email + " (" + self.fg.web_master.name + ")", channel.find("webMaster").text) def test_categoryWithoutSubcategory(self): c = Category("Arts") self.fg.category = c channel = self.fg._create_rss().find("channel") itunes_category = channel.find("{%s}category" % self.nsItunes) assert itunes_category is not None self.assertEqual(itunes_category.get("text"), c.category) assert itunes_category.find("{%s}category" % self.nsItunes) is None def test_categoryWithSubcategory(self): c = Category("Arts", "Food") self.fg.category = c channel = self.fg._create_rss().find("channel") itunes_category = channel.find("{%s}category" % self.nsItunes) assert itunes_category is not None itunes_subcategory = itunes_category\ .find("{%s}category" % self.nsItunes) assert itunes_subcategory is not None self.assertEqual(itunes_subcategory.get("text"), c.subcategory) def test_categoryChecks(self): c = ("Arts", "Food") self.assertRaises(TypeError, setattr, self.fg, "category", c) def test_explicitIsExplicit(self): self.fg.explicit = True channel = self.fg._create_rss().find("channel") itunes_explicit = channel.find("{%s}explicit" % self.nsItunes) assert itunes_explicit is not None assert itunes_explicit.text.lower() in ("yes", "explicit", "true"),\ "itunes:explicit was %s, expected yes, explicit or true" \ % itunes_explicit.text def test_explicitIsClean(self): self.fg.explicit = False channel = self.fg._create_rss().find("channel") itunes_explicit = channel.find("{%s}explicit" % self.nsItunes) assert itunes_explicit is not None assert itunes_explicit.text.lower() in ("no", "clean", "false"),\ "itunes:explicit was %s, expected no, clean or false" \ % itunes_explicit.text def test_mandatoryValues(self): # Try to create a Podcast once for each mandatory property. # On each iteration, exactly one of the properties is not set. # Therefore, an exception should be thrown on each iteration. mandatory_properties = set([ "description", "title", "link", "explicit", ]) for test_property in mandatory_properties: fg = Podcast() if test_property != "description": fg.description = self.description if test_property != "title": fg.name = self.name if test_property != "link": fg.website = self.website if test_property != "explicit": fg.explicit = self.explicit try: self.assertRaises(ValueError, fg._create_rss) except AssertionError as e: raise_from( AssertionError("The test failed for %s" % test_property), e) def test_withholdFromItunesOffByDefault(self): assert not self.fg.withhold_from_itunes def test_withholdFromItunes(self): self.fg.withhold_from_itunes = True itunes_block = self.fg._create_rss().find("channel")\ .find("{%s}block" % self.nsItunes) assert itunes_block is not None self.assertEqual(itunes_block.text.lower(), "yes") self.fg.withhold_from_itunes = False itunes_block = self.fg._create_rss().find("channel")\ .find("{%s}block" % self.nsItunes) assert itunes_block is None def test_modifyingSkipDaysAfterwards(self): self.fg.skip_days.add("Unrecognized day") self.assertRaises(ValueError, self.fg.rss_str) self.fg.skip_days.remove("Unrecognized day") self.fg.rss_str() # Now it works def test_modifyingSkipHoursAfterwards(self): self.fg.skip_hours.add(26) self.assertRaises(ValueError, self.fg.rss_str) self.fg.skip_hours.remove(26) self.fg.rss_str() # Now it works # Tests for xslt def test_xslt_str(self): def use_str(**kwargs): return self.fg.rss_str(**kwargs) self.help_test_xslt_using(use_str) def test_xslt_file(self): def use_file(**kwargs): return self.getRssFeedFileContents(self.fg, **kwargs) self.help_test_xslt_using(use_file) def help_test_xslt_using(self, generated_feed): """Run tests for xslt, generating the feed str using the given function. """ xslt_path = "http://example.com/mystylesheet.xsl" xslt_pi = "<?xml-stylesheet" # No xslt when set to None self.fg.xslt = None assert xslt_pi not in generated_feed() assert xslt_pi not in generated_feed(minimize=True) assert xslt_pi not in generated_feed(xml_declaration=False) self.fg.xslt = xslt_path # Now we have the stylesheet in there assert xslt_pi in generated_feed() assert xslt_pi in generated_feed(minimize=True) assert xslt_pi in generated_feed(xml_declaration=False) assert xslt_path in generated_feed() assert xslt_path in generated_feed(minimize=True) assert xslt_path in generated_feed(xml_declaration=False) def test_imageWarningNoExt(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") self.assertEqual(len(w), 0) # Set image to a URL without proper file extension no_ext = "http://static.example.com/images/logo" self.fg.image = no_ext # Did we get a warning? self.assertEqual(1, len(w)) assert issubclass(w.pop().category, NotSupportedByItunesWarning) # Was the image set? self.assertEqual(no_ext, self.fg.image) def test_imageWarningBadExt(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") # Set image to a URL with an unsupported file extension bad_ext = "http://static.example.com/images/logo.gif" self.fg.image = bad_ext # Did we get a warning? self.assertEqual(1, len(w)) # Was it of the correct type? assert issubclass(w.pop().category, NotSupportedByItunesWarning) # Was the image still set? self.assertEqual(bad_ext, self.fg.image) def test_imageNoWarningWithGoodExt(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") # Set image to a URL with a supported file extension extensions = ["jpg", "png", "jpeg"] for extension in extensions: good_ext = "http://static.example.com/images/logo." + extension self.fg.image = good_ext # Did we get no warning? self.assertEqual( 0, len(w), "Extension %s raised warnings (%s)" % (extension, w)) # Was the image set? self.assertEqual(good_ext, self.fg.image)
class TestPodcast(unittest.TestCase): def setUp(self): fg = Podcast() self.nsContent = "http://purl.org/rss/1.0/modules/content/" self.nsDc = "http://purl.org/dc/elements/1.1/" self.nsItunes = "http://www.itunes.com/dtds/podcast-1.0.dtd" self.feed_url = "http://example.com/feeds/myfeed.rss" self.name = 'Some Testfeed' self.author = Person('John Doe', '*****@*****.**') self.website = 'http://example.com' self.description = 'This is a cool feed!' self.subtitle = 'Coolest of all' self.language = 'en' self.cloudDomain = 'example.com' self.cloudPort = '4711' self.cloudPath = '/ws/example' self.cloudRegisterProcedure = 'registerProcedure' self.cloudProtocol = 'SOAP 1.1' self.pubsubhubbub = "http://pubsubhubbub.example.com/" self.contributor = {'name':"Contributor Name", 'email': 'Contributor email'} self.copyright = "The copyright notice" self.docs = 'http://www.rssboard.org/rss-specification' self.skip_days = set(['Tuesday']) self.skip_hours = set([23]) self.explicit = False self.programname = podgen.version.name self.web_master = Person(email='*****@*****.**') self.image = "http://example.com/static/podcast.png" self.owner = self.author self.complete = True self.new_feed_url = "https://example.com/feeds/myfeed2.rss" self.xslt = "http://example.com/feed/stylesheet.xsl" fg.name = self.name fg.website = self.website fg.description = self.description fg.subtitle = self.subtitle fg.language = self.language fg.cloud = (self.cloudDomain, self.cloudPort, self.cloudPath, self.cloudRegisterProcedure, self.cloudProtocol) fg.pubsubhubbub = self.pubsubhubbub fg.copyright = self.copyright fg.authors.append(self.author) fg.skip_days = self.skip_days fg.skip_hours = self.skip_hours fg.web_master = self.web_master fg.feed_url = self.feed_url fg.explicit = self.explicit fg.image = self.image fg.owner = self.owner fg.complete = self.complete fg.new_feed_url = self.new_feed_url fg.xslt = self.xslt self.fg = fg warnings.simplefilter("always") def noop(*args, **kwargs): pass warnings.showwarning = noop def test_constructor(self): # Overwrite fg from setup self.fg = Podcast( name=self.name, website=self.website, description=self.description, subtitle=self.subtitle, language=self.language, cloud=(self.cloudDomain, self.cloudPort, self.cloudPath, self.cloudRegisterProcedure, self.cloudProtocol), pubsubhubbub=self.pubsubhubbub, copyright=self.copyright, authors=[self.author], skip_days=self.skip_days, skip_hours=self.skip_hours, web_master=self.web_master, feed_url=self.feed_url, explicit=self.explicit, image=self.image, owner=self.owner, complete=self.complete, new_feed_url=self.new_feed_url, xslt=self.xslt, ) # Test that the fields are actually set self.test_baseFeed() def test_constructorUnknownAttributes(self): self.assertRaises(TypeError, Podcast, naem="Oh, looks like a typo") self.assertRaises(TypeError, Podcast, "Haha, No Keyword") def test_baseFeed(self): fg = self.fg assert fg.name == self.name assert fg.authors[0] == self.author assert fg.web_master == self.web_master assert fg.website == self.website assert fg.description == self.description assert fg.subtitle == self.subtitle assert fg.language == self.language assert fg.feed_url == self.feed_url assert fg.image == self.image assert fg.owner == self.owner assert fg.complete == self.complete assert fg.pubsubhubbub == self.pubsubhubbub assert fg.cloud == (self.cloudDomain, self.cloudPort, self.cloudPath, self.cloudRegisterProcedure, self.cloudProtocol) assert fg.copyright == self.copyright assert fg.new_feed_url == self.new_feed_url assert fg.skip_days == self.skip_days assert fg.skip_hours == self.skip_hours assert fg.xslt == self.xslt def test_rssFeedFile(self): fg = self.fg rssString = self.getRssFeedFileContents(fg, xml_declaration=False)\ .replace('\n', '') self.checkRssString(rssString) def getRssFeedFileContents(self, fg, **kwargs): # Keep track of our temporary file and its filename filename = None file = None try: # Get our temporary file name file = tempfile.NamedTemporaryFile(delete=False) filename = file.name # Close the file; we will just use its name file.close() # Write the RSS to the file (overwriting it) fg.rss_file(filename=filename, **kwargs) # Read the resulting RSS with open(filename, "r") as myfile: rssString = myfile.read() finally: # We don't need the file any longer, so delete it if filename: os.unlink(filename) elif file: # Ops, we were interrupted between the first and second stmt filename = file.name file.close() os.unlink(filename) else: # We were interrupted between entering the try-block and # getting the temporary file. Not much we can do. pass return rssString def test_rssFeedString(self): fg = self.fg rssString = fg.rss_str(xml_declaration=False) self.checkRssString(rssString) def test_rssStringAndFileAreEqual(self): rss_string = self.fg.rss_str() rss_file = self.getRssFeedFileContents(self.fg) self.assertEqual(rss_string, rss_file) def checkRssString(self, rssString): feed = etree.fromstring(rssString) nsRss = self.nsContent nsAtom = "http://www.w3.org/2005/Atom" channel = feed.find("channel") assert channel != None assert channel.find("title").text == self.name assert channel.find("description").text == self.description assert channel.find("{%s}subtitle" % self.nsItunes).text == \ self.subtitle assert channel.find("link").text == self.website assert channel.find("lastBuildDate").text != None assert channel.find("language").text == self.language assert channel.find("docs").text == "http://www.rssboard.org/rss-specification" assert self.programname in channel.find("generator").text assert channel.find("cloud").get('domain') == self.cloudDomain assert channel.find("cloud").get('port') == self.cloudPort assert channel.find("cloud").get('path') == self.cloudPath assert channel.find("cloud").get('registerProcedure') == self.cloudRegisterProcedure assert channel.find("cloud").get('protocol') == self.cloudProtocol assert channel.find("copyright").text == self.copyright assert channel.find("docs").text == self.docs assert self.author.email in channel.find("managingEditor").text assert channel.find("skipDays").find("day").text in self.skip_days assert int(channel.find("skipHours").find("hour").text) in self.skip_hours assert self.web_master.email in channel.find("webMaster").text links = channel.findall("{%s}link" % nsAtom) selflinks = [link for link in links if link.get('rel') == 'self'] hublinks = [link for link in links if link.get('rel') == 'hub'] assert selflinks, "No <atom:link rel='self'> element found" selflink = selflinks[0] assert selflink.get('href') == self.feed_url assert selflink.get('type') == 'application/rss+xml' assert hublinks, "No <atom:link rel='hub'> element found" hublink = hublinks[0] assert hublink.get('href') == self.pubsubhubbub assert hublink.get('type') is None assert channel.find("{%s}image" % self.nsItunes).get('href') == \ self.image owner = channel.find("{%s}owner" % self.nsItunes) assert owner.find("{%s}name" % self.nsItunes).text == self.owner.name assert owner.find("{%s}email" % self.nsItunes).text == self.owner.email assert channel.find("{%s}complete" % self.nsItunes).text.lower() == \ "yes" assert channel.find("{%s}new-feed-url" % self.nsItunes).text == \ self.new_feed_url def test_feedUrlValidation(self): self.assertRaises(ValueError, setattr, self.fg, "feed_url", "example.com/feed.rss") def test_generator(self): software_name = "My Awesome Software" software_version = (1, 0) software_url = "http://example.com/awesomesoft/" # Using set_generator, text includes python-podgen self.fg.set_generator(software_name) rss = self.fg._create_rss() generator = rss.find("channel").find("generator").text assert software_name in generator assert self.programname in generator # Using set_generator, text excludes python-podgen self.fg.set_generator(software_name, exclude_podgen=True) generator = self.fg._create_rss().find("channel").find("generator").text assert software_name in generator assert self.programname not in generator # Using set_generator, text includes name, version and url self.fg.set_generator(software_name, software_version, software_url) generator = self.fg._create_rss().find("channel").find("generator").text assert software_name in generator assert str(software_version[0]) in generator assert str(software_version[1]) in generator assert software_url in generator # Using generator directly, text excludes python-podgen self.fg.generator = software_name generator = self.fg._create_rss().find("channel").find("generator").text assert software_name in generator assert self.programname not in generator def test_str(self): assert str(self.fg) == self.fg.rss_str( minimize=False, encoding="UTF-8", xml_declaration=True ) def test_updated(self): date = datetime.datetime(2016, 1, 1, 0, 10, tzinfo=dateutil.tz.tzutc()) def getLastBuildDateElement(fg): return fg._create_rss().find("channel").find("lastBuildDate") # Test that it has a default assert getLastBuildDateElement(self.fg) is not None # Test that it respects my custom value self.fg.last_updated = date lastBuildDate = getLastBuildDateElement(self.fg) assert lastBuildDate is not None assert dateutil.parser.parse(lastBuildDate.text) == date # Test that it is left out when set to False self.fg.last_updated = False lastBuildDate = getLastBuildDateElement(self.fg) assert lastBuildDate is None def test_AuthorEmail(self): # Just email - so use managingEditor, not dc:creator or itunes:author # This is per the RSS best practices, see the section about dc:creator self.fg.authors = [Person(None, "*****@*****.**")] channel = self.fg._create_rss().find("channel") # managingEditor uses email? assert channel.find("managingEditor").text == self.fg.authors[0].email # No dc:creator? assert channel.find("{%s}creator" % self.nsDc) is None # No itunes:author? assert channel.find("{%s}author" % self.nsItunes) is None def test_AuthorName(self): # Just name - use dc:creator and itunes:author, not managingEditor self.fg.authors = [Person("Just a. Name")] channel = self.fg._create_rss().find("channel") # No managingEditor? assert channel.find("managingEditor") is None # dc:creator equals name? assert channel.find("{%s}creator" % self.nsDc).text == \ self.fg.authors[0].name # itunes:author equals name? assert channel.find("{%s}author" % self.nsItunes).text == \ self.fg.authors[0].name def test_AuthorNameAndEmail(self): # Both name and email - use managingEditor and itunes:author, # not dc:creator self.fg.authors = [Person("Both a name", "*****@*****.**")] channel = self.fg._create_rss().find("channel") # Does managingEditor follow the pattern "email (name)"? self.assertEqual(self.fg.authors[0].email + " (" + self.fg.authors[0].name + ")", channel.find("managingEditor").text) # No dc:creator? assert channel.find("{%s}creator" % self.nsDc) is None # itunes:author uses name only? assert channel.find("{%s}author" % self.nsItunes).text == \ self.fg.authors[0].name def test_multipleAuthors(self): # Multiple authors - use itunes:author and dc:creator, not # managingEditor. person1 = Person("Multiple", "*****@*****.**") person2 = Person("Are", "*****@*****.**") self.fg.authors = [person1, person2] channel = self.fg._create_rss().find("channel") # Test dc:creator author_elements = \ channel.findall("{%s}creator" % self.nsDc) author_texts = [e.text for e in author_elements] assert len(author_texts) == 2 assert person1.name in author_texts[0] assert person1.email in author_texts[0] assert person2.name in author_texts[1] assert person2.email in author_texts[1] # Test itunes:author itunes_author = channel.find("{%s}author" % self.nsItunes) assert itunes_author is not None itunes_author_text = itunes_author.text assert person1.name in itunes_author_text assert person1.email not in itunes_author_text assert person2.name in itunes_author_text assert person2.email not in itunes_author_text # Test that managingEditor is not used assert channel.find("managingEditor") is None def test_authorsInvalidValue(self): self.assertRaises(TypeError, self.do_authorsInvalidValue) def do_authorsInvalidValue(self): self.fg.authors = Person("Opsie", "*****@*****.**") def test_webMaster(self): self.fg.web_master = Person(None, "*****@*****.**") channel = self.fg._create_rss().find("channel") assert channel.find("webMaster").text == self.fg.web_master.email self.assertRaises(ValueError, setattr, self.fg, "web_master", Person("Mr. No Email Address")) self.fg.web_master = Person("Both a name", "*****@*****.**") channel = self.fg._create_rss().find("channel") # Does webMaster follow the pattern "email (name)"? self.assertEqual(self.fg.web_master.email + " (" + self.fg.web_master.name + ")", channel.find("webMaster").text) def test_categoryWithoutSubcategory(self): c = Category("Arts") self.fg.category = c channel = self.fg._create_rss().find("channel") itunes_category = channel.find("{%s}category" % self.nsItunes) assert itunes_category is not None self.assertEqual(itunes_category.get("text"), c.category) assert itunes_category.find("{%s}category" % self.nsItunes) is None def test_categoryWithSubcategory(self): c = Category("Arts", "Food") self.fg.category = c channel = self.fg._create_rss().find("channel") itunes_category = channel.find("{%s}category" % self.nsItunes) assert itunes_category is not None itunes_subcategory = itunes_category\ .find("{%s}category" % self.nsItunes) assert itunes_subcategory is not None self.assertEqual(itunes_subcategory.get("text"), c.subcategory) def test_categoryChecks(self): c = ("Arts", "Food") self.assertRaises(TypeError, setattr, self.fg, "category", c) def test_explicitIsExplicit(self): self.fg.explicit = True channel = self.fg._create_rss().find("channel") itunes_explicit = channel.find("{%s}explicit" % self.nsItunes) assert itunes_explicit is not None assert itunes_explicit.text.lower() in ("yes", "explicit", "true"),\ "itunes:explicit was %s, expected yes, explicit or true" \ % itunes_explicit.text def test_explicitIsClean(self): self.fg.explicit = False channel = self.fg._create_rss().find("channel") itunes_explicit = channel.find("{%s}explicit" % self.nsItunes) assert itunes_explicit is not None assert itunes_explicit.text.lower() in ("no", "clean", "false"),\ "itunes:explicit was %s, expected no, clean or false" \ % itunes_explicit.text def test_mandatoryValues(self): # Try to create a Podcast once for each mandatory property. # On each iteration, exactly one of the properties is not set. # Therefore, an exception should be thrown on each iteration. mandatory_properties = set([ "description", "title", "link", "explicit", ]) for test_property in mandatory_properties: fg = Podcast() if test_property != "description": fg.description = self.description if test_property != "title": fg.name = self.name if test_property != "link": fg.website = self.website if test_property != "explicit": fg.explicit = self.explicit try: self.assertRaises(ValueError, fg._create_rss) except AssertionError as e: raise_from(AssertionError( "The test failed for %s" % test_property), e) def test_withholdFromItunesOffByDefault(self): assert not self.fg.withhold_from_itunes def test_withholdFromItunes(self): self.fg.withhold_from_itunes = True itunes_block = self.fg._create_rss().find("channel")\ .find("{%s}block" % self.nsItunes) assert itunes_block is not None self.assertEqual(itunes_block.text.lower(), "yes") self.fg.withhold_from_itunes = False itunes_block = self.fg._create_rss().find("channel")\ .find("{%s}block" % self.nsItunes) assert itunes_block is None def test_modifyingSkipDaysAfterwards(self): self.fg.skip_days.add("Unrecognized day") self.assertRaises(ValueError, self.fg.rss_str) self.fg.skip_days.remove("Unrecognized day") self.fg.rss_str() # Now it works def test_modifyingSkipHoursAfterwards(self): self.fg.skip_hours.add(26) self.assertRaises(ValueError, self.fg.rss_str) self.fg.skip_hours.remove(26) self.fg.rss_str() # Now it works # Tests for xslt def test_xslt_str(self): def use_str(**kwargs): return self.fg.rss_str(**kwargs) self.help_test_xslt_using(use_str) def test_xslt_file(self): def use_file(**kwargs): return self.getRssFeedFileContents(self.fg, **kwargs) self.help_test_xslt_using(use_file) def help_test_xslt_using(self, generated_feed): """Run tests for xslt, generating the feed str using the given function. """ xslt_path = "http://example.com/mystylesheet.xsl" xslt_pi = "<?xml-stylesheet" # No xslt when set to None self.fg.xslt = None assert xslt_pi not in generated_feed() assert xslt_pi not in generated_feed(minimize=True) assert xslt_pi not in generated_feed(xml_declaration=False) self.fg.xslt = xslt_path # Now we have the stylesheet in there assert xslt_pi in generated_feed() assert xslt_pi in generated_feed(minimize=True) assert xslt_pi in generated_feed(xml_declaration=False) assert xslt_path in generated_feed() assert xslt_path in generated_feed(minimize=True) assert xslt_path in generated_feed(xml_declaration=False) def test_imageWarningNoExt(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") self.assertEqual(len(w), 0) # Set image to a URL without proper file extension no_ext = "http://static.example.com/images/logo" self.fg.image = no_ext # Did we get a warning? self.assertEqual(1, len(w)) assert issubclass(w.pop().category, NotSupportedByItunesWarning) # Was the image set? self.assertEqual(no_ext, self.fg.image) def test_imageWarningBadExt(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") # Set image to a URL with an unsupported file extension bad_ext = "http://static.example.com/images/logo.gif" self.fg.image = bad_ext # Did we get a warning? self.assertEqual(1, len(w)) # Was it of the correct type? assert issubclass(w.pop().category, NotSupportedByItunesWarning) # Was the image still set? self.assertEqual(bad_ext, self.fg.image) def test_imageNoWarningWithGoodExt(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") # Set image to a URL with a supported file extension extensions = ["jpg", "png", "jpeg"] for extension in extensions: good_ext = "http://static.example.com/images/logo." + extension self.fg.image = good_ext # Did we get no warning? self.assertEqual(0, len(w), "Extension %s raised warnings (%s)" % (extension, w)) # Was the image set? self.assertEqual(good_ext, self.fg.image)
if next((x for x in session_items if x['CID'] == cid), None): print(f'WARNING: duplicate CID {cid} for new item: {title}') # write the new sessions json file updated_session_items = new_items + session_items for item in updated_session_items: item['link'] = f'{ipfs_prefix}{item["CID"]}{ipfs_suffix}' with open(sessions_filename, 'w') as outfile: json.dump(updated_session_items, outfile, indent=2) print('>>> wrote fresh sessions.json file') # write the new rss file p = Podcast() p.name = "The Objectivism Seminar" p.category = Category("Society & Culture", "Philosophy") p.language = "en-US" p.explicit = True p.description = ( "A weekly online conference call to systematically study " + "the philosophy of Objectivism via the works of prominent Rand scholars.") p.website = "https://www.ObjectivismSeminar.com" p.image = "https://www.ObjectivismSeminar.com/assets/images/atlas-square.jpg" p.feed_url = "https://www.ObjectivismSeminar.com/archives/rss" p.authors = [Person("Greg Perkins, Host", "*****@*****.**")] p.owner = Person("Greg Perkins", "*****@*****.**") p.episodes += [
class Ximalaya(): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.album_list_api = "http://www.ximalaya.com/revision/play/album?albumId={}&pageNum=1&sort=1&pageSize=999".format( album_id) self.album_url = 'http://www.ximalaya.com/album/%s' % album_id self.header = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': self.album_url, 'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1', } def album(self): page = requests.get(self.album_url, headers=self.header) soup = BeautifulSoup(page.content, "lxml") # 初始化 self.podcast = Podcast() self.podcast.name = soup.find('h1', 'title').get_text() self.podcast.authors.append( Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.album_url self.podcast.copyright = 'cc-by' self.podcast.description = soup.find('div', 'album-intro').get_text() self.podcast.language = 'cn' self.podcast.image = soup.find( 'div', 'album-info').find('img').get('src').split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') album_list_content = requests.get(self.album_list_api, headers=self.header).content album_list_data = json.loads(album_list_content.decode('utf-8')) count = len(album_list_data['data']['tracksAudioPlay']) for each in album_list_data['data']['tracksAudioPlay']: try: page_info = requests.get('http://www.ximalaya.com/%s' % each['trackUrl'], headers=self.header) soup_info = BeautifulSoup(page_info.content, "lxml") episode = self.podcast.add_episode() episode.id = str(each['index']) episode.title = each['trackName'] print self.podcast.name + '=====' + each['trackName'] image = each['trackCoverPath'].split('!')[0] if (image[-4:] == '.gif' or image[-4:] == '.bmp'): episode.image = self.podcast.image else: episode.image = image if soup_info.find('article', 'intro'): episode.summary = soup_info.find( 'article', 'intro').get_text().encode('gbk', 'ignore').decode('gbk') else: episode.summary = each['trackName'] episode.link = 'http://www.ximalaya.com/%s' % each['albumUrl'] episode.authors = [Person("forecho", '*****@*****.**')] episode.publication_date = self.reduction_time( soup_info.find('span', 'time').get_text()) episode.media = Media(each['src'], each['duration']) episode.position = count - each['index'] + 1 except Exception as e: print('异常:', e) print('异常 URL:', 'http://www.ximalaya.com/%s' % each['trackUrl']) # 生成文件 # print self.podcast.rss_str() self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True) # 时间转换 @staticmethod def reduction_time(created_date): timestamp = datetime.strptime(created_date, "%Y-%m-%d %H:%M:%S") return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute, tzinfo=pytz.utc)
content = requests.get(base_url).content soup = BeautifulSoup(content, features="lxml") urls_to_follow = [] for anchor in soup.select("#listProgramsContent a")[:10]: urls_to_follow.append(base_href + anchor.get("href")) p = Podcast( name="Alta Tensão", description="Alta Tensão com António Freitas", image="https://cdn-images.rtp.pt/EPG/radio/imagens/1068_10159_53970.jpg", website=base_url, explicit=True, ) episodes = [] for url in urls_to_follow: content = requests.get(url).content soup = BeautifulSoup(content, features="lxml") res = re.search(b'file : "(.+?)",\\n', content) title = soup.select("b.vod-title")[0].text date = soup.select(".vod-data p span.episode-date")[0].text media_url = res.groups()[0].decode() head = requests.head(url) if '\n' in title:
class Ximalaya(): def __init__(self, album_id): self.podcast = None self.album_id = album_id self.page_size = 30 self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}" self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}" self.detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?device=android&trackId={}" self.album_url = "https://www.ximalaya.com/album/{}" self.header = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': self.album_url.format(self.album_id), 'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1', } def album(self): album_info = requests.get(self.album_info_url.format(self.album_id), headers=self.header).content album_info_content = json.loads(album_info.decode('utf-8')) if album_info_content['ret'] == 200: album_info_data = album_info_content['data'] # 初始化 self.podcast = Podcast() self.podcast.name = album_info_data['mainInfo']['albumTitle'] self.podcast.authors.append( Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.album_url.format(self.album_id) self.podcast.copyright = 'cc-by' if album_info_data['mainInfo']['richIntro']: self.podcast.description = album_info_data['mainInfo'][ 'richIntro'] else: self.podcast.description = self.podcast.name self.podcast.language = 'cn' self.podcast.image = 'https:' + album_info_data['mainInfo'][ 'cover'].split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') page_num = 1 # py2 +1 track_total_count = math.ceil( album_info_data['tracksInfo']['trackTotalCount'] / self.page_size) + 1 while page_num <= track_total_count: album_list = requests.get(self.album_list_url.format( self.album_id, page_num, self.page_size), headers=self.header).content album_list_content = json.loads(album_list.decode('utf-8')) count = len(album_list_content['data']['tracksAudioPlay']) for each in album_list_content['data']['tracksAudioPlay']: try: detail = requests.get(self.detail_url.format( each['trackId']), headers=self.header).content detail_content = json.loads(detail.decode('utf-8')) episode = self.podcast.add_episode() episode.id = str(each['index']) episode.title = each['trackName'] print(self.podcast.name + '=====' + each['trackName']) image = each['trackCoverPath'].split('!')[0] if image[-4:] == '.png' or image[-4:] == '.jpg': episode.image = 'https:' + image else: episode.image = self.podcast.image if 'intro' in detail_content: episode.summary = detail_content['intro'].replace( '\r\n', '') else: episode.summary = each['trackName'] episode.link = 'http://www.ximalaya.com%s' % each[ 'albumUrl'] episode.authors = [ Person("forecho", '*****@*****.**') ] episode.publication_date = self.reduction_time( detail_content['createdAt']) episode.media = Media(each['src'], each['duration']) episode.position = count - each['index'] + 1 except Exception as e: print('异常:', e) print('异常 URL:', 'https://www.ximalaya.com%s' % each['trackUrl']) traceback.print_exc() # 生成文件 # print self.podcast.rss_str() page_num = page_num + 1 self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True) # 时间转换 参数 毫秒时间戳 @staticmethod def reduction_time(time): timestamp = datetime.fromtimestamp(time / 1000) return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute, tzinfo=pytz.utc)
def genero_feed(episodesList): if episodesList: # Creo un nuovo podcast p = Podcast() p.name = "NECST Tech Time" p.description = "The NECSTLab (Novel, Emerging Computing System Technologies Laboratory) is a laboratory inside DEIB department of Politecnico di Milano, where there are a number of different research lines on advanced topics in computing systems: from architectural characteristics, to hardware-software codesign methodologies, to security and dependability issues of complex system architectures (scaling from mobile devices to large virtualized datacenters)." p.website = "http://www.poliradio.it/podcast/programmi/34/necst-tech-time" p.explicit = True p.image = "https://rss.draghetti.it/necst_image.jpg" p.feed_url = "https://rss.draghetti.it/necstpodcast.xml" p.copyright = "Poli Radio" p.language = "it-IT" for episodedetails in episodesList: episode = Episode() episode.title = episodedetails[1].encode("ascii", "ignore") episode.link = episodedetails[2].encode("ascii", "ignore") # La dimensione e statistica in base alle puntante analizzate episode.media = Media(episodedetails[3], 30000000, type="audio/x-m4a", duration=None) episode.publication_date = episodedetails[4] p.episodes.append(episode) # Print to stdout, just as an example p.rss_file(rssfile, minimize=False)
def generate_podcast(self, feed_name: str) -> str: """ Create podcast XML based on the files found in podcastDir. Taken from https://podgen.readthedocs.io/en/latest/usage_guide/podcasts.html :param self: PodcastService class :param feed_name: name of the feed and the sub-directory for files :return: string of the podcast """ # Initialize the feed p = Podcast() # Required fields p.name = f'{feed_name} Archive' p.description = 'Stuff to listen to later' p.website = self.base_url p.complete = False # Optional p.language = 'en-US' p.feed_url = f'{p.website}/feeds/{feed_name}/rss' p.explicit = False p.authors.append(Person("Anthology")) # for filepath in glob.iglob(f'{self.search_dir}/{feed_name}/*.mp3'): for path in Path(f'{self.search_dir}/{feed_name}').glob('**/*.mp3'): filepath = str(path) episode = p.add_episode() # Attempt to load saved metadata metadata_file_name = filepath.replace('.mp3', '.json') try: with open(metadata_file_name) as metadata_file: metadata = json.load(metadata_file) except FileNotFoundError: metadata = {} except JSONDecodeError: metadata = {} self.logger.error(f'Failed to read {metadata_file_name}') # Build the episode based on either the saved metadata or the file details episode.title = metadata.get( 'title', filepath.split('/')[-1].rstrip('.mp3')) episode.summary = metadata.get('summary', htmlencode('Some Summary')) if 'link' in metadata: episode.link = metadata.get('link') if 'authors' in metadata: episode.authors = [ Person(author) for author in metadata.get('authors') ] episode.publication_date = \ isoparse(metadata.get('publication_date')) if 'publication_date' in metadata \ else datetime.fromtimestamp(os.path.getmtime(filepath), tz=pytz.utc) episode.media = Media( f'{p.website}/{filepath.lstrip(self.search_dir)}'.replace( ' ', '+'), os.path.getsize(filepath)) episode.media.populate_duration_from(filepath) if "image" in metadata: episode.image = metadata.get('image') else: for ext in ['.jpg', '.png']: image_file_name = filepath.replace('.mp3', ext) if os.path.isfile(image_file_name): episode.image = f'{p.website}/{image_file_name.lstrip(self.search_dir)}'.replace( ' ', '+') break # Save the metadata for future editing if not os.path.exists(metadata_file_name): metadata = { 'title': episode.title, 'summary': episode.summary, 'publication_date': episode.publication_date, 'authors': episode.authors } with open(metadata_file_name, 'w') as outFile: json.dump(metadata, outFile, indent=2, default=str) return p.rss_str()
def main(): """Create an example podcast and print it or save it to a file.""" # There must be exactly one argument, and it is must end with rss if len(sys.argv) != 2 or not ( sys.argv[1].endswith('rss')): # Invalid usage, print help message # print_enc is just a custom function which functions like print, # except it deals with byte arrays properly. print_enc ('Usage: %s ( <file>.rss | rss )' % \ 'python -m podgen') print_enc ('') print_enc (' rss -- Generate RSS test output and print it to stdout.') print_enc (' <file>.rss -- Generate RSS test teed and write it to file.rss.') print_enc ('') exit() # Remember what type of feed the user wants arg = sys.argv[1] from podgen import Podcast, Person, Media, Category, htmlencode # Initialize the feed p = Podcast() p.name = 'Testfeed' p.authors.append(Person("Lars Kiesow", "*****@*****.**")) p.website = 'http://example.com' p.copyright = 'cc-by' p.description = 'This is a cool feed!' p.language = 'de' p.feed_url = 'http://example.com/feeds/myfeed.rss' p.category = Category('Technology', 'Podcasting') p.explicit = False p.complete = False p.new_feed_url = 'http://example.com/new-feed.rss' p.owner = Person('John Doe', '*****@*****.**') p.xslt = "http://example.com/stylesheet.xsl" e1 = p.add_episode() e1.id = 'http://lernfunk.de/_MEDIAID_123#1' e1.title = 'First Element' e1.summary = htmlencode('''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Tamen aberramus a proposito, et, ne longius, prorsus, inquam, Piso, si ista mala sunt, placet. Aut etiam, ut vestitum, sic sententiam habeas aliam domesticam, aliam forensem, ut in fronte ostentatio sit, intus veritas occultetur? Cum id fugiunt, re eadem defendunt, quae Peripatetici, verba <3.''') e1.link = 'http://example.com' e1.authors = [Person('Lars Kiesow', '*****@*****.**')] e1.publication_date = datetime.datetime(2014, 5, 17, 13, 37, 10, tzinfo=pytz.utc) e1.media = Media("http://example.com/episodes/loremipsum.mp3", 454599964, duration= datetime.timedelta(hours=1, minutes=32, seconds=19)) # Should we just print out, or write to file? if arg == 'rss': # Print print_enc(p.rss_str()) elif arg.endswith('rss'): # Write to file p.rss_file(arg, minimize=True)