def genero_feed(episodesList): if episodesList: # Creo un nuovo podcast p = Podcast() p.name = "NECST Tech Time" p.description = "The NECSTLab (Novel, Emerging Computing System Technologies Laboratory) is a laboratory inside DEIB department of Politecnico di Milano, where there are a number of different research lines on advanced topics in computing systems: from architectural characteristics, to hardware-software codesign methodologies, to security and dependability issues of complex system architectures (scaling from mobile devices to large virtualized datacenters)." p.website = "http://www.poliradio.it/podcast/programmi/34/necst-tech-time" p.explicit = True p.image = "https://rss.draghetti.it/necst_image.jpg" p.feed_url = "https://rss.draghetti.it/necstpodcast.xml" p.copyright = "Poli Radio" p.language = "it-IT" for episodedetails in episodesList: episode = Episode() episode.title = episodedetails[1].encode("ascii", "ignore") episode.link = episodedetails[2].encode("ascii", "ignore") # La dimensione e statistica in base alle puntante analizzate episode.media = Media(episodedetails[3], 30000000, type="audio/x-m4a", duration=None) episode.publication_date = episodedetails[4] p.episodes.append(episode) # Print to stdout, just as an example p.rss_file(rssfile, minimize=False)
def genero_feed(episodesList): if episodesList: # Creo un nuovo podcast p = Podcast() p.name = "NECST Tech Time" p.description = "Feed Podcast non ufficiale di NECST Tech Time - Powered By Andrea Draghetti" p.website = "http://www.poliradio.it/podcast/programmi/34/necst-tech-time" p.explicit = True p.image = "https://rss.draghetti.it/necst_image.jpg" p.feed_url = "https://rss.draghetti.it/necstpodcast.xml" p.copyright = "Poli Radio" p.language = "it-IT" for episodedetails in episodesList: episode = Episode() episode.title = episodedetails[1].encode("ascii", "ignore") episode.link = episodedetails[2].encode("ascii", "ignore") # La dimensione e statistica in base alle puntante analizzate episode.media = Media(episodedetails[3], 30000000, type="audio/x-m4a", duration=None) episode.publication_date = episodedetails[4] p.episodes.append(episode) # Print to stdout, just as an example p.rss_file(rssfile, minimize=False)
def main(): with open('thebugle.json') as f: episodes = json.load(f) p = Podcast( name="TimesOnLine Bugle Archive", description="Old Bugle episodes, podcast feed", website="https://www.thebuglepodcast.com/", explicit=False, ) for episode in episodes: ep = p.add_episode( Episode(title=f"{episode['id']}: {episode['title']}")) ep.media = Media.create_from_server_response( f"{MEDIA_BASE_URL}/{episode['file']}") ep.media.fetch_duration() date = episode['date'].split('-') ep.publication_date = datetime(int(date[0]), int(date[1]), int(date[2]), 0, 0, 0, tzinfo=pytz.utc) print(p.rss_str())
def album(self): album_info_content = requests.get(self.album_info_api).content album_info_data = json.loads(album_info_content) album_list_content = requests.get(self.album_list_api).content album_list_data = json.loads(album_list_content) self.podcast = Podcast() self.podcast.name = album_info_data['data']['title'] self.podcast.authors.append(Person("Powered by maijver", '*****@*****.**')) self.podcast.website = self.url self.podcast.copyright = 'cc-by' self.podcast.description = album_info_data['data']['description'] self.podcast.language = 'cn' self.podcast.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '') self.podcast.feed_url = 'http://podcast.forecho.com/qingting/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("maijver", '*****@*****.**') for each in album_list_data['data']: episode = self.podcast.add_episode() episode.id = str(each['id']) episode.title = each['title'] print(self.podcast.name + '=====' + each['title']) episode.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '') episode.summary = each['title'] episode.link = 'http://www.qingting.fm/channels/{}/programs/{}'.format(self.album_id, each['id']) episode.authors = [Person("forecho", '*****@*****.**')] episode.publication_date = self.reduction_time(each['update_time']) episode.media = Media("http://od.qingting.fm/{}".format(each['mediainfo']['bitrates_url'][0]['file_path']), each['duration']) self.podcast.rss_file('qingting/{}.rss'.format(self.album_id), minimize=True)
def album(self): page = requests.get(self.url, headers=self.header) soup = BeautifulSoup(page.content, "lxml") # 初始化 self.podcast = Podcast() self.podcast.name = soup.find('div', 'detailContent_title').get_text() self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.url self.podcast.copyright = 'cc-by' self.podcast.description = soup.find('div', 'mid_intro').get_text() self.podcast.language = 'cn' self.podcast.image = soup.find('a', 'albumface180').find('img').get('src').split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') sound_ids = soup.find('div', class_='personal_body').get('sound_ids').split(',') for sound_id in sound_ids: date = soup.find('li', sound_id=sound_id).find('div', class_='operate').get_text().strip() self.detail(sound_id, date) # 生成文件 # print self.podcast.rss_str() self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)
def generate_podcast_xml(base, books): from podgen import Podcast, Episode from datetime import timedelta from podgen import Media p = Podcast() p.name = "AeonNeo's Audiobooks" p.description = "Description" p.website = "www.yangvincent.com" p.explicit = False # create episode for book_name in books: ep = Episode() ep.title = book_name[:-4] full_path = base + '/files/' + book_name dev_path = 'files/' + book_name try: book_size = os.path.getsize(dev_path) except OSError as e: print(e) book_size = 0 ep.media = Media(full_path, type='audio/mp4a', size=book_size) p.episodes.append(ep) # Generate rss p.rss_file('skeleton/rss.xml', minimize=True)
def test_constructor(self): # Overwrite fg from setup self.fg = Podcast( name=self.name, website=self.website, description=self.description, subtitle=self.subtitle, language=self.language, cloud=(self.cloudDomain, self.cloudPort, self.cloudPath, self.cloudRegisterProcedure, self.cloudProtocol), pubsubhubbub=self.pubsubhubbub, copyright=self.copyright, authors=[self.author], skip_days=self.skip_days, skip_hours=self.skip_hours, web_master=self.web_master, feed_url=self.feed_url, explicit=self.explicit, image=self.image, owner=self.owner, complete=self.complete, new_feed_url=self.new_feed_url, xslt=self.xslt, ) # Test that the fields are actually set self.test_baseFeed()
def genero_feed(puntateList): if puntateList: # Creo un nuovo podcast p = Podcast() p.name = "Il Ruggito del Coniglio" p.description = "Il Ruggito del Coniglio, il programma cult di Radio 2 condotto da Marco Presta e Antonello Dose, racconta l'attualita con folgorante ironia." p.website = "http://www.raiplayradio.it/programmi/ilruggitodelconiglio/" p.explicit = True p.image = "https://rss.draghetti.it/ruggitodelconiglio_image.jpg" p.feed_url = "https://rss.draghetti.it/ruggitodelconiglio.xml" p.copyright = "Rai Radio 2" p.language = "it-IT" for puntata in puntateList: episode = Episode() episode.title = puntata[0].encode("ascii", "ignore") episode.link = puntata[1] # La dimensione del file e approssimativa episode.media = Media(puntata[3], puntata[4]) if puntata[2]: episode.publication_date = datetime.datetime(int(puntata[2].split("/")[2]), int(puntata[2].split("/")[1]), int(puntata[2].split("/")[0]), 10, 00, tzinfo=pytz.utc) else: episode.publication_date = pytz.utc.localize(datetime.datetime.utcnow()) p.episodes.append(episode) # Print to stdout, just as an example p.rss_file(rssfile, minimize=False)
def test_mandatoryValues(self): # Try to create a Podcast once for each mandatory property. # On each iteration, exactly one of the properties is not set. # Therefore, an exception should be thrown on each iteration. mandatory_properties = set([ "description", "title", "link", "explicit", ]) for test_property in mandatory_properties: fg = Podcast() if test_property != "description": fg.description = self.description if test_property != "title": fg.name = self.name if test_property != "link": fg.website = self.website if test_property != "explicit": fg.explicit = self.explicit try: self.assertRaises(ValueError, fg._create_rss) except AssertionError as e: raise_from( AssertionError("The test failed for %s" % test_property), e)
def genero_feed(episodesList): if episodesList: # Creo un nuovo podcast p = Podcast() p.name = "All You Can Dance by Dino Brawn" p.description = "Feed Podcast non ufficiale di All You Can Dance by Dino Brown - Powered By Andrea Draghetti" p.website = "https://onedance.fm/" p.explicit = True p.image = "https://rss.draghetti.it/allyoucandance_image.jpg" p.feed_url = "https://rss.draghetti.it/allyoucandance.xml" p.copyright = "One Dance" p.language = "it-IT" for episodedetails in episodesList: episode = Episode() episode.title = episodedetails[1].encode("ascii", "ignore") episode.link = episodedetails[2].encode("ascii", "ignore") # La dimensione e statistica in base alle puntante analizzate episode.media = Media(episodedetails[3], 30000000, type="audio/x-m4a", duration=None) episode.publication_date = episodedetails[4] p.episodes.append(episode) # Print to stdout, just as an example p.rss_file(rssfile, minimize=False)
def main(): """Create an example podcast and print it or save it to a file.""" # There must be exactly one argument, and it is must end with rss if len(sys.argv) != 2 or not ( sys.argv[1].endswith('rss')): # Invalid usage, print help message # print_enc is just a custom function which functions like print, # except it deals with byte arrays properly. print_enc ('Usage: %s ( <file>.rss | rss )' % \ 'python -m podgen') print_enc ('') print_enc (' rss -- Generate RSS test output and print it to stdout.') print_enc (' <file>.rss -- Generate RSS test teed and write it to file.rss.') print_enc ('') exit() # Remember what type of feed the user wants arg = sys.argv[1] from podgen import Podcast, Person, Media, Category, htmlencode # Initialize the feed p = Podcast() p.name = 'Testfeed' p.authors.append(Person("Lars Kiesow", "*****@*****.**")) p.website = 'http://example.com' p.copyright = 'cc-by' p.description = 'This is a cool feed!' p.language = 'de' p.feed_url = 'http://example.com/feeds/myfeed.rss' p.category = Category('Technology', 'Podcasting') p.explicit = False p.complete = False p.new_feed_url = 'http://example.com/new-feed.rss' p.owner = Person('John Doe', '*****@*****.**') p.xslt = "http://example.com/stylesheet.xsl" e1 = p.add_episode() e1.id = 'http://lernfunk.de/_MEDIAID_123#1' e1.title = 'First Element' e1.summary = htmlencode('''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Tamen aberramus a proposito, et, ne longius, prorsus, inquam, Piso, si ista mala sunt, placet. Aut etiam, ut vestitum, sic sententiam habeas aliam domesticam, aliam forensem, ut in fronte ostentatio sit, intus veritas occultetur? Cum id fugiunt, re eadem defendunt, quae Peripatetici, verba <3.''') e1.link = 'http://example.com' e1.authors = [Person('Lars Kiesow', '*****@*****.**')] e1.publication_date = datetime.datetime(2014, 5, 17, 13, 37, 10, tzinfo=pytz.utc) e1.media = Media("http://example.com/episodes/loremipsum.mp3", 454599964, duration= datetime.timedelta(hours=1, minutes=32, seconds=19)) # Should we just print out, or write to file? if arg == 'rss': # Print print_enc(p.rss_str()) elif arg.endswith('rss'): # Write to file p.rss_file(arg, minimize=True)
def generate_podcast_xml(podcasts): podcast = Podcast(name=config.PODCAST_NAME, description=config.PODCAST_DESCRIPTION, website=config.PODCAST_WEBSITE, explicit=config.PODCAST_CONTAINS_EXPLICIT_CONTENT, withhold_from_itunes=True) podcast.episodes = podcasts return podcast.rss_str()
def album(self): page = requests.get(self.album_url, headers=self.header) soup = BeautifulSoup(page.content, "lxml") # 初始化 self.podcast = Podcast() self.podcast.name = soup.find('h1', 'title').get_text() self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.album_url self.podcast.copyright = 'cc-by' if soup.find('div', 'album-intro') and soup.find('div', 'album-intro').get_text(): self.podcast.description = soup.find('div', 'album-intro').get_text() else: self.podcast.description = self.podcast.name self.podcast.language = 'cn' self.podcast.image = soup.find('div', 'album-info').find('img').get('src').split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') album_list_content = requests.get(self.album_list_api, headers=self.header).content album_list_data = json.loads(album_list_content.decode('utf-8')) count = len(album_list_data['data']['tracksAudioPlay']) for each in album_list_data['data']['tracksAudioPlay']: try: detail_url = 'http://www.ximalaya.com/tracks/%s.json' % each['trackId'] response = requests.get(detail_url, headers=self.header) item = json.loads(response.content) episode = self.podcast.add_episode() episode.id = str(each['index']) episode.title = each['trackName'] print(self.podcast.name + '=====' + each['trackName']) image = each['trackCoverPath'].split('!')[0] if (image[-4:] == '.gif' or image[-4:] == '.bmp'): episode.image = self.podcast.image else: episode.image = image if item['intro']: episode.summary = item['intro'].replace('\r\n', '') else: episode.summary = each['trackName'] episode.link = 'http://www.ximalaya.com%s' % each['albumUrl'] episode.authors = [Person("forecho", '*****@*****.**')] episode.publication_date = self.reduction_time(item['time_until_now'], item['formatted_created_at']) episode.media = Media(each['src'], each['duration']) episode.position = count - each['index'] + 1 except Exception as e: print('异常:', e) print('异常 URL:', 'http://www.ximalaya.com%s' % each['trackUrl']) traceback.print_exc() # 生成文件 # print self.podcast.rss_str() self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)
def test_removeEntryByIndex(self): fg = Podcast() self.feedId = 'http://example.com' self.title = 'Some Testfeed' fe = fg.add_episode() fe.id = 'http://lernfunk.de/media/654321/1' fe.title = 'The Third BaseEpisode' assert len(fg.episodes) == 1 fg.episodes.pop(0) assert len(fg.episodes) == 0
def rssfeed(request, programid): """ Builds the rss feed for a program identified by it's id. (int) 1. Fetches all episodes of the program from the digas db. 2. gets the programinfo from the app db 3. Uses podgen to do the actual XML-generation. """ podcasts = DigasPodcast.objects.using('digas').filter( softdel=0, program=int(programid)).only('program', 'title', 'remark', 'author', 'createdate', 'broadcastdate', 'filename', 'filesize', 'duration', 'softdel').order_by('-createdate') programinfo = ProgramInfo.objects.get(programid=int(programid)) # loading globalsettings here, and not at the module_level # This way django won't explode because of missing # constance_config table when we start on scratch # or set up in a new environment. from .models import globalsettings p = Podcast( name=programinfo.name, subtitle=programinfo.subtitle, description=programinfo.description, website=feed_url(programid), # programinfo.website, explicit=programinfo.explicit, category=Category(programinfo.category), authors=[globalsettings.owner], language=programinfo.language, owner=globalsettings.owner, feed_url=feed_url(programid), new_feed_url=feed_url(programid), image=programinfo.image_url, ) for episode in podcasts: # Get pubdate from createdate or broadcastdate pubdate = digas2pubdate(episode.createdate, episode.broadcastdate) # Add the episode to the list p.episodes.append( Episode( title=episode.title, media=Media(mp3url(episode.filename), episode.filesize), link=mp3url(episode.filename), # multifeedreader uses this. id=guid(episode.filename), summary=episode.remark, publication_date=pubdate)) # send it as unicode rss = u'%s' % p return HttpResponse(rss, content_type='application/xml')
def create_podcast(name, desc, website): p = Podcast() #if not res: p.name = name p.description = desc p.authors = [Person("Dawn News", "*****@*****.**")] p.website = website p.image = "http://3.15.38.214/zarahatkay/cover_art.png" p.language = "en-US" p.feed_url = "http://3.15.38.214/zarahatkay" p.category = Category("News & Politics") p.explicit = False return p
def main(event, context): dynamodb = boto3.resource('dynamodb', region_name='sa-east-1') table = dynamodb.Table('semservidor-dev') podcasts = table.scan() author = Person("Evandro Pires da Silva", "*****@*****.**") p = Podcast( name="Sem Servidor", description= "Podcast dedicado a arquitetura serverless, com conteúdo de qualidade em português.", website="https://semservidor.com.br", explicit=False, copyright="2020 Evandro Pires da Silva", language="pr-BR", authors=[author], feed_url= "https://3tz8r90j0d.execute-api.sa-east-1.amazonaws.com/dev/podcasts/rss", category=Category("Music", "Music History"), owner=author, image="http://d30gvsirhz3ono.cloudfront.net/logo_semservidor_teste.jpg", web_master=Person(None, "*****@*****.**")) items = podcasts['Items'] for item in items: base_url = "http://d30gvsirhz3ono.cloudfront.net/" file_path = base_url + item['info']['arquivo']['nome'] p.episodes += [ Episode(title=item['info']['episodio'], media=Media(file_path, int(item['info']['arquivo']['tamanho'])), summary=item['info']['descricao'], position=int(item['id'])) ] p.apply_episode_order() rss = p.rss_str() response = { "statusCode": 200, "headers": { "content-type": "application/xml" }, "body": rss } return response
def generate_rss_from_articles(feed_settings, articles): """ Creates a FeedGenerator feed from a set of feed_entries. :param feed_settings: a feed_settings object containing :param articles: :return: """ # Initialize the feed podcast = Podcast() podcast.name = feed_settings.title author = Person(feed_settings.author['name'], feed_settings.author['email']) podcast.authors.append(author) podcast.website = feed_settings.source_page_url podcast.copyright = feed_settings.copyright podcast.description = feed_settings.subtitle podcast.summary = feed_settings.subtitle podcast.subtitle = feed_settings.subtitle podcast.language = 'vi' podcast.feed_url = feed_settings.output_url podcast.image = feed_settings.img_url podcast.category = Category('Music', 'Music Commentary') podcast.explicit = False # p.complete = False # p.new_feed_url = 'http://example.com/new-feed.rss' podcast.owner = author # p.xslt = "http://example.com/stylesheet.xsl" vt_tz = pytz.timezone('Asia/Ho_Chi_Minh') pastdate = datetime.datetime(2000, 1, 1, 0, 0).astimezone(vt_tz) # podcast.last_updated = datetime.datetime.now(vt_tz) for article in articles: episode = podcast.add_episode() episode.id = article.link episode.title = article.title episode.summary = article.description episode.link = article.link # episode.authors = [Person('Lars Kiesow', '*****@*****.**')] episode.publication_date = article.pub_date pastdate = max(pastdate, article.pub_date) # episode.media = Media.create_from_server_response(article.media, size=None, duration=None) episode.media = Media(article.media, size=None, duration=None, type=article.type) podcast.last_updated = pastdate podcast.publication_date = pastdate return podcast
def rss(url_token): dropbox_access_token, title, description = get_the_latest_token_info( url_token) urls = get_temporary_link(dropbox_access_token) p = Podcast() p.name = title p.description = description p.website = "https://www.google.com" p.explicit = True for i, (size, url, uid, name) in enumerate(urls): my_episode = Episode() my_episode.title = os.path.splitext(name)[0] my_episode.id = uid my_episode.media = Media(url, size=size, type="audio/mpeg") p.episodes.append(my_episode) return Response(str(p), mimetype='text/xml')
def scrape_morning_edition( web_session=requests_html.HTMLSession(), params=params): podcast = Podcast() podcast.name = "NPR Morning Edition" podcast.description = \ """Every weekday for over three decades, Morning Edition has taken listeners around the country and the world with two hours of multi-faceted stories and commentaries that inform, challenge and occasionally amuse. Morning Edition is the most listened-to news radio program in the country.""" podcast.website = "https://www.npr.org/programs/morning-edition" podcast.explicit = False scrape(web_session, params, 'morning-edition', podcast) rssfeed = podcast.rss_str(minimize=False) #log.debug(f"\n\nfeed { rssfeed }") return rssfeed
def scrape_by_program(program, web_session=requests_html.HTMLSession(), params=params): podcast = Podcast() podcast.explicit = False podcast.website = params[PARAMS_BASEURL].format(program=program) if program == 'morning-edition': podcast.name = "NPR Morning Edition" podcast.description = \ """Every weekday for over three decades, Morning Edition has taken listeners around the country and the world with two hours of multi-faceted stories and commentaries that inform, challenge and occasionally amuse. Morning Edition is the most listened-to news radio program in the country.""" podcast.image = 'https://media.npr.org/assets/img/2018/08/06/npr_me_podcasttile_sq-4036eb96471eeed96c37dfba404bb48ea798e78c-s200-c85.jpg' elif program == 'all-things-considered': podcast.name = "NPR All Things Considered" podcast.description = \ """NPR's afternoon news show""" podcast.image = 'https://media.npr.org/assets/img/2018/08/06/npr_atc_podcasttile_sq-bcc33a301405d37aa6bdcc090f43d29264915f4a-s200-c85.jpg' elif program == 'weekend-edition-saturday': podcast.name = "NPR Weekend Edition Saturday" podcast.description = \ """NPR morning news on Saturday""" podcast.image = 'https://media.npr.org/assets/img/2019/02/26/we_otherentitiestemplatesat_sq-cbde87a2fa31b01047441e6f34d2769b0287bcd4-s200-c85.png' elif program == 'weekend-edition-sunday': podcast.name = "NPR Weekend Edition Sunday" podcast.description = \ """NPR morning news show on Sunday""" podcast.image = 'https://media.npr.org/assets/img/2019/02/26/we_otherentitiestemplatesun_sq-4a03b35e7e5adfa446aec374523a578d54dc9bf5-s200-c85.png' else: raise WebFormatException(f"program { program } not found") scrape(web_session, params, program, podcast) rssfeed = podcast.rss_str(minimize=False) #log.debug(f"\n\nfeed { rssfeed }") return rssfeed
def genero_feed(puntateList): if puntateList: # Creo un nuovo podcast p = Podcast() p.name = "Pascal Rai Radio 2" p.description = "Pascal un programma di Matteo Caccia in onda su Radio2 che racconta storie di vita. Episodi grandi o piccoli, stravolgenti o minuti, momenti che hanno modificato per sempre la nostra vita o che, anche se di poco, l'hanno indirizzata. Storie che sono il termometro della temperatura di ognuno di noi e che in parte raccontano chi siamo. " p.website = "http://www.raiplayradio.it/programmi/pascal/" p.explicit = True p.image = "https://rss.draghetti.it/pascal_image.jpg" p.feed_url = "https://rss.draghetti.it/pascal.xml" p.copyright = "Rai Radio 2" p.language = "it-IT" for puntata in puntateList: episode = Episode() episode.title = puntata[0].encode("ascii", "ignore") episode.link = puntata[1] # La dimensione del file e approssimativa episode.media = Media(puntata[3], puntata[4]) if puntata[2]: episode.publication_date = datetime.datetime( int(puntata[2].split("/")[2]), int(puntata[2].split("/")[1]), int(puntata[2].split("/")[0]), 20, 00, tzinfo=pytz.utc) else: episode.publication_date = pytz.utc.localize( datetime.datetime.utcnow()) p.episodes.append(episode) # Print to stdout, just as an example p.rss_file(rssfile, minimize=False)
def setUp(self): self.itunes_ns = 'http://www.itunes.com/dtds/podcast-1.0.dtd' self.dublin_ns = 'http://purl.org/dc/elements/1.1/' fg = Podcast() self.title = 'Some Testfeed' self.link = 'http://lernfunk.de' self.description = 'A cool tent' self.explicit = False fg.name = self.title fg.website = self.link fg.description = self.description fg.explicit = self.explicit fe = fg.add_episode() fe.id = 'http://lernfunk.de/media/654321/1' fe.title = 'The First Episode' self.fe = fe #Use also the list directly fe = Episode() fg.episodes.append(fe) fe.id = 'http://lernfunk.de/media/654321/1' fe.title = 'The Second Episode' fe = fg.add_episode() fe.id = 'http://lernfunk.de/media/654321/1' fe.title = 'The Third Episode' self.fg = fg warnings.simplefilter("always") def noop(*args, **kwargs): pass warnings.showwarning = noop
def get_podcast(self): webpage = tools.get_url(self.album_info_url.format(self.album_id), self.headers) album_info = json.loads(webpage.decode('utf-8')) if album_info['ret'] == 200: album_info_data = album_info['data'] self.podcast = Podcast() self.podcast.name = album_info_data['mainInfo']['albumTitle'] self.podcast.website = self.album_url.format(self.album_id) if album_info_data['mainInfo']['richIntro']: self.podcast.description = album_info_data['mainInfo']['richIntro'] self.podcast.language = 'cn' self.podcast.image = 'https:' + album_info_data['mainInfo']['cover'].split('!')[0] self.podcast.generator = 'kanemori.getpodcast' self.podcast.explicit = False self.podcast.withhold_from_itunes = True text = '' page_num = 1 album_page_count = math.ceil(album_info_data['tracksInfo']['trackTotalCount'] / self.episode_pre_page) + 1 while page_num <= album_page_count: webpage = tools.get_url(self.album_list_url.format(self.album_id, page_num, self.episode_pre_page), self.headers) album_list = json.loads(webpage.decode('utf-8')) for episode_info in album_list['data']['tracksAudioPlay']: _, link = self.get_episode(episode_info['trackId']) text += link page_num += 1 path = './podcast/ximalaya' if not os.path.exists(path): os.makedirs(path) self.podcast.rss_file(os.path.join(path, '{}.xml'.format(self.album_id)), minimize=True) # tools.save_m4a(os.path.join(path, '{}.txt'.format(self.album_id)), text) print("「{}」が上手に焼きました".format(self.album_id))
def generate_podcast(self, feed_name: str) -> str: """ Create podcast XML based on the files found in podcastDir. Taken from https://podgen.readthedocs.io/en/latest/usage_guide/podcasts.html :param self: PodcastService class :param feed_name: name of the feed and the sub-directory for files :return: string of the podcast """ # Initialize the feed p = Podcast() # Required fields p.name = f'{feed_name} Archive' p.description = 'Stuff to listen to later' p.website = self.base_url p.complete = False # Optional p.language = 'en-US' p.feed_url = f'{p.website}/feeds/{feed_name}/rss' p.explicit = False p.authors.append(Person("Anthology")) # for filepath in glob.iglob(f'{self.search_dir}/{feed_name}/*.mp3'): for path in Path(f'{self.search_dir}/{feed_name}').glob('**/*.mp3'): filepath = str(path) episode = p.add_episode() # Attempt to load saved metadata metadata_file_name = filepath.replace('.mp3', '.json') try: with open(metadata_file_name) as metadata_file: metadata = json.load(metadata_file) except FileNotFoundError: metadata = {} except JSONDecodeError: metadata = {} self.logger.error(f'Failed to read {metadata_file_name}') # Build the episode based on either the saved metadata or the file details episode.title = metadata.get( 'title', filepath.split('/')[-1].rstrip('.mp3')) episode.summary = metadata.get('summary', htmlencode('Some Summary')) if 'link' in metadata: episode.link = metadata.get('link') if 'authors' in metadata: episode.authors = [ Person(author) for author in metadata.get('authors') ] episode.publication_date = \ isoparse(metadata.get('publication_date')) if 'publication_date' in metadata \ else datetime.fromtimestamp(os.path.getmtime(filepath), tz=pytz.utc) episode.media = Media( f'{p.website}/{filepath.lstrip(self.search_dir)}'.replace( ' ', '+'), os.path.getsize(filepath)) episode.media.populate_duration_from(filepath) if "image" in metadata: episode.image = metadata.get('image') else: for ext in ['.jpg', '.png']: image_file_name = filepath.replace('.mp3', ext) if os.path.isfile(image_file_name): episode.image = f'{p.website}/{image_file_name.lstrip(self.search_dir)}'.replace( ' ', '+') break # Save the metadata for future editing if not os.path.exists(metadata_file_name): metadata = { 'title': episode.title, 'summary': episode.summary, 'publication_date': episode.publication_date, 'authors': episode.authors } with open(metadata_file_name, 'w') as outFile: json.dump(metadata, outFile, indent=2, default=str) return p.rss_str()
def album(self): album_info = requests.get(self.album_info_url.format(self.album_id), headers=self.header).content album_info_content = json.loads(album_info.decode('utf-8')) if album_info_content['ret'] == 200: album_info_data = album_info_content['data'] # 初始化 self.podcast = Podcast() self.podcast.name = album_info_data['mainInfo']['albumTitle'] self.podcast.authors.append( Person("Powered by forecho", '*****@*****.**')) self.podcast.website = self.album_url.format(self.album_id) self.podcast.copyright = 'cc-by' if album_info_data['mainInfo']['richIntro']: self.podcast.description = album_info_data['mainInfo'][ 'richIntro'] else: self.podcast.description = self.podcast.name self.podcast.language = 'cn' self.podcast.image = 'https:' + album_info_data['mainInfo'][ 'cover'].split('!')[0] self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id self.podcast.category = Category('Technology', 'Podcasting') self.podcast.explicit = False self.podcast.complete = False self.podcast.owner = Person("forecho", '*****@*****.**') page_num = 1 # py2 +1 track_total_count = math.ceil( album_info_data['tracksInfo']['trackTotalCount'] / self.page_size) + 1 while page_num <= track_total_count: album_list = requests.get(self.album_list_url.format( self.album_id, page_num, self.page_size), headers=self.header).content album_list_content = json.loads(album_list.decode('utf-8')) count = len(album_list_content['data']['tracksAudioPlay']) for each in album_list_content['data']['tracksAudioPlay']: try: detail = requests.get(self.detail_url.format( each['trackId']), headers=self.header).content detail_content = json.loads(detail.decode('utf-8')) episode = self.podcast.add_episode() episode.id = str(each['index']) episode.title = each['trackName'] print(self.podcast.name + '=====' + each['trackName']) image = each['trackCoverPath'].split('!')[0] if image[-4:] == '.png' or image[-4:] == '.jpg': episode.image = 'https:' + image else: episode.image = self.podcast.image if 'intro' in detail_content: episode.summary = detail_content['intro'].replace( '\r\n', '') else: episode.summary = each['trackName'] episode.link = 'http://www.ximalaya.com%s' % each[ 'albumUrl'] episode.authors = [ Person("forecho", '*****@*****.**') ] episode.publication_date = self.reduction_time( detail_content['createdAt']) episode.media = Media(each['src'], each['duration']) episode.position = count - each['index'] + 1 except Exception as e: print('异常:', e) print('异常 URL:', 'https://www.ximalaya.com%s' % each['trackUrl']) traceback.print_exc() # 生成文件 # print self.podcast.rss_str() page_num = page_num + 1 self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)
def lambda_handler(event, context): print('Starting cccRssBuilder Lambda function') # Get episodes from DynamoDB episodes = query_episodes() episodes.sort(key=lambda x: x['episode-num']) # Create the podcast feed # Main podcast info comes from "episode 0" episodeInfo = episodes[0] separator = ', ' p = Podcast() p.name = episodeInfo['name'] p.description = episodeInfo['description'] p.website = episodeInfo['website'] p.explicit = episodeInfo['explicit'] p.image = episodeInfo['image'] p.feed_url = episodeInfo['feed-url'] p.language = episodeInfo['language'] p.category = Category(episodeInfo['category'], episodeInfo['subcategory']) p.owner = Person(episodeInfo['owner-name'], episodeInfo['owner-email']) p.authors = [Person(episodeInfo['owner-name'], episodeInfo['owner-email'])] # Process each episode for episode in episodes: # Skip "Episode 0" if episode['episode-num'] == 0: continue # Check if episode contains media file info (name, duration, size). If not, add it to db and episode object. if 'media-file' not in episode: episodeNum = episode['episode-num'] print('Analyzing media file for episode', episodeNum) mediaFile = 'ccc-{:03d}-{}.mp3'.format(int(episodeNum), episode['pub-date']) print('Media file:', mediaFile) localMediaFile = '/tmp/' + mediaFile s3 = boto3.client('s3') s3.download_file('kwksolutions.com', 'ccc/media/' + mediaFile, localMediaFile) # Try to analyze the mp3 file - looking for duration and file size try: audio = MP3(localMediaFile) except: print('Not an MP3 file!') return duration = round(audio.info.length) hours = int(duration / 3600) minutes = int((duration % 3600) / 60) seconds = duration % 60 if hours == 0: durationStr = '{:02d}:{:02d}'.format(minutes, seconds) else: durationStr = '{:02d}:{:02d}:{:02d}'.format( hours, minutes, seconds) size = str(os.path.getsize(localMediaFile)) update_episode(episodeNum, mediaFile, size, durationStr) episode['media-file'] = mediaFile episode['size'] = size episode['duration'] = durationStr # Figure out all the info needed for the episode object mediaURL = 'https://www.kwksolutions.com/ccc/media/' + episode[ 'media-file'] durationList = episode['duration'].split(':') secs = int(durationList[-1]) mins = int(durationList[-2]) try: h = int(durationList[-3]) except: h = 0 pubdateList = episode['pub-date'].split('-') year = int(pubdateList[0]) month = int(pubdateList[1]) day = int(pubdateList[2]) # Build the episode object e = p.add_episode() e.id = mediaURL e.title = 'Episode ' + str(episode['episode-num']) e.summary = episode['description'] e.link = 'http://christcommunitycarmel.org/get-involved/podcasts' e.publication_date = datetime.datetime(year, month, day, 12, 00, 00, tzinfo=pytz.timezone('EST')) e.media = Media(mediaURL, episode['size'], duration=datetime.timedelta(hours=h, minutes=mins, seconds=secs)) # Write the rss file print('Writing RSS file to S3') rssLocalFile = '/tmp/podcast.rss' rssS3File = 'ccc/podcast.rss' p.rss_file(rssLocalFile) s3 = boto3.client('s3') s3.upload_file(rssLocalFile, 'kwksolutions.com', rssS3File, ExtraArgs={'ContentType': 'text/xml'}) return
content = requests.get(base_url).content soup = BeautifulSoup(content, features="lxml") urls_to_follow = [] for anchor in soup.select("#listProgramsContent a")[:10]: urls_to_follow.append(base_href + anchor.get("href")) p = Podcast( name="Alta Tensão", description="Alta Tensão com António Freitas", image="https://cdn-images.rtp.pt/EPG/radio/imagens/1068_10159_53970.jpg", website=base_url, explicit=True, ) episodes = [] for url in urls_to_follow: content = requests.get(url).content soup = BeautifulSoup(content, features="lxml") res = re.search(b'file : "(.+?)",\\n', content) title = soup.select("b.vod-title")[0].text date = soup.select(".vod-data p span.episode-date")[0].text media_url = res.groups()[0].decode() head = requests.head(url) if '\n' in title:
if next((x for x in session_items if x['CID'] == cid), None): print(f'WARNING: duplicate CID {cid} for new item: {title}') # write the new sessions json file updated_session_items = new_items + session_items for item in updated_session_items: item['link'] = f'{ipfs_prefix}{item["CID"]}{ipfs_suffix}' with open(sessions_filename, 'w') as outfile: json.dump(updated_session_items, outfile, indent=2) print('>>> wrote fresh sessions.json file') # write the new rss file p = Podcast() p.name = "The Objectivism Seminar" p.category = Category("Society & Culture", "Philosophy") p.language = "en-US" p.explicit = True p.description = ( "A weekly online conference call to systematically study " + "the philosophy of Objectivism via the works of prominent Rand scholars.") p.website = "https://www.ObjectivismSeminar.com" p.image = "https://www.ObjectivismSeminar.com/assets/images/atlas-square.jpg" p.feed_url = "https://www.ObjectivismSeminar.com/archives/rss" p.authors = [Person("Greg Perkins, Host", "*****@*****.**")] p.owner = Person("Greg Perkins", "*****@*****.**") p.episodes += [
def setUp(self): self.existing_locale = locale.setlocale(locale.LC_ALL, None) locale.setlocale(locale.LC_ALL, 'C') fg = Podcast() self.nsContent = "http://purl.org/rss/1.0/modules/content/" self.nsDc = "http://purl.org/dc/elements/1.1/" self.nsItunes = "http://www.itunes.com/dtds/podcast-1.0.dtd" self.feed_url = "http://example.com/feeds/myfeed.rss" self.name = 'Some Testfeed' # Use character not in ASCII to catch encoding errors self.author = Person('Jon Døll', '*****@*****.**') self.website = 'http://example.com' self.description = 'This is a cool feed!' self.subtitle = 'Coolest of all' self.language = 'en' self.cloudDomain = 'example.com' self.cloudPort = '4711' self.cloudPath = '/ws/example' self.cloudRegisterProcedure = 'registerProcedure' self.cloudProtocol = 'SOAP 1.1' self.pubsubhubbub = "http://pubsubhubbub.example.com/" self.contributor = { 'name': "Contributor Name", 'email': 'Contributor email' } self.copyright = "The copyright notice" self.docs = 'http://www.rssboard.org/rss-specification' self.skip_days = set(['Tuesday']) self.skip_hours = set([23]) self.explicit = False self.programname = podgen.version.name self.web_master = Person(email='*****@*****.**') self.image = "http://example.com/static/podcast.png" self.owner = self.author self.complete = True self.new_feed_url = "https://example.com/feeds/myfeed2.rss" self.xslt = "http://example.com/feed/stylesheet.xsl" fg.name = self.name fg.website = self.website fg.description = self.description fg.subtitle = self.subtitle fg.language = self.language fg.cloud = (self.cloudDomain, self.cloudPort, self.cloudPath, self.cloudRegisterProcedure, self.cloudProtocol) fg.pubsubhubbub = self.pubsubhubbub fg.copyright = self.copyright fg.authors.append(self.author) fg.skip_days = self.skip_days fg.skip_hours = self.skip_hours fg.web_master = self.web_master fg.feed_url = self.feed_url fg.explicit = self.explicit fg.image = self.image fg.owner = self.owner fg.complete = self.complete fg.new_feed_url = self.new_feed_url fg.xslt = self.xslt self.fg = fg warnings.simplefilter("always") def noop(*args, **kwargs): pass warnings.showwarning = noop