Пример #1
0
def genero_feed(episodesList):
    if episodesList:
        # Creo un nuovo podcast
        p = Podcast()

        p.name = "NECST Tech Time"
        p.description = "The NECSTLab (Novel, Emerging Computing System Technologies Laboratory) is a laboratory inside DEIB department of Politecnico di Milano, where there are a number of different research lines on advanced topics in computing systems: from architectural characteristics, to hardware-software codesign methodologies, to security and dependability issues of complex system architectures (scaling from mobile devices to large virtualized datacenters)."
        p.website = "http://www.poliradio.it/podcast/programmi/34/necst-tech-time"
        p.explicit = True
        p.image = "https://rss.draghetti.it/necst_image.jpg"
        p.feed_url = "https://rss.draghetti.it/necstpodcast.xml"
        p.copyright = "Poli Radio"
        p.language = "it-IT"

        for episodedetails in episodesList:
            episode = Episode()

            episode.title = episodedetails[1].encode("ascii", "ignore")
            episode.link = episodedetails[2].encode("ascii", "ignore")

            # La dimensione e statistica in base alle puntante analizzate
            episode.media = Media(episodedetails[3], 30000000, type="audio/x-m4a", duration=None)
            episode.publication_date = episodedetails[4]

            p.episodes.append(episode)

        # Print to stdout, just as an example
        p.rss_file(rssfile, minimize=False)
Пример #2
0
def genero_feed(episodesList):
    if episodesList:
        # Creo un nuovo podcast
        p = Podcast()

        p.name = "NECST Tech Time"
        p.description = "Feed Podcast non ufficiale di NECST Tech Time - Powered By Andrea Draghetti"
        p.website = "http://www.poliradio.it/podcast/programmi/34/necst-tech-time"
        p.explicit = True
        p.image = "https://rss.draghetti.it/necst_image.jpg"
        p.feed_url = "https://rss.draghetti.it/necstpodcast.xml"
        p.copyright = "Poli Radio"
        p.language = "it-IT"

        for episodedetails in episodesList:
            episode = Episode()

            episode.title = episodedetails[1].encode("ascii", "ignore")
            episode.link = episodedetails[2].encode("ascii", "ignore")

            # La dimensione e statistica in base alle puntante analizzate
            episode.media = Media(episodedetails[3],
                                  30000000,
                                  type="audio/x-m4a",
                                  duration=None)
            episode.publication_date = episodedetails[4]

            p.episodes.append(episode)

        # Print to stdout, just as an example
        p.rss_file(rssfile, minimize=False)
Пример #3
0
def main():
    with open('thebugle.json') as f:
        episodes = json.load(f)

    p = Podcast(
        name="TimesOnLine Bugle Archive",
        description="Old Bugle episodes, podcast feed",
        website="https://www.thebuglepodcast.com/",
        explicit=False,
    )

    for episode in episodes:
        ep = p.add_episode(
            Episode(title=f"{episode['id']}: {episode['title']}"))
        ep.media = Media.create_from_server_response(
            f"{MEDIA_BASE_URL}/{episode['file']}")

        ep.media.fetch_duration()

        date = episode['date'].split('-')
        ep.publication_date = datetime(int(date[0]),
                                       int(date[1]),
                                       int(date[2]),
                                       0,
                                       0,
                                       0,
                                       tzinfo=pytz.utc)

    print(p.rss_str())
Пример #4
0
    def album(self):
        album_info_content = requests.get(self.album_info_api).content
        album_info_data = json.loads(album_info_content)
        album_list_content = requests.get(self.album_list_api).content
        album_list_data = json.loads(album_list_content)

        self.podcast = Podcast()
        self.podcast.name = album_info_data['data']['title']
        self.podcast.authors.append(Person("Powered by maijver", '*****@*****.**'))
        self.podcast.website = self.url
        self.podcast.copyright = 'cc-by'
        self.podcast.description = album_info_data['data']['description']
        self.podcast.language = 'cn'
        self.podcast.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '')
        self.podcast.feed_url = 'http://podcast.forecho.com/qingting/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("maijver", '*****@*****.**')

        for each in album_list_data['data']:
            episode = self.podcast.add_episode()
            episode.id = str(each['id'])
            episode.title = each['title']
            print(self.podcast.name + '=====' + each['title'])
            episode.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '')
            episode.summary = each['title']
            episode.link = 'http://www.qingting.fm/channels/{}/programs/{}'.format(self.album_id, each['id'])
            episode.authors = [Person("forecho", '*****@*****.**')]
            episode.publication_date = self.reduction_time(each['update_time'])
            episode.media = Media("http://od.qingting.fm/{}".format(each['mediainfo']['bitrates_url'][0]['file_path']),
                                  each['duration'])

        self.podcast.rss_file('qingting/{}.rss'.format(self.album_id), minimize=True)
Пример #5
0
    def album(self):
        page = requests.get(self.url, headers=self.header)
        soup = BeautifulSoup(page.content, "lxml")

        # 初始化
        self.podcast = Podcast()
        self.podcast.name = soup.find('div', 'detailContent_title').get_text()
        self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**'))
        self.podcast.website = self.url
        self.podcast.copyright = 'cc-by'
        self.podcast.description = soup.find('div', 'mid_intro').get_text()
        self.podcast.language = 'cn'
        self.podcast.image = soup.find('a', 'albumface180').find('img').get('src').split('!')[0]
        self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("forecho", '*****@*****.**')

        sound_ids = soup.find('div', class_='personal_body').get('sound_ids').split(',')

        for sound_id in sound_ids:
            date = soup.find('li', sound_id=sound_id).find('div', class_='operate').get_text().strip()
            self.detail(sound_id, date)
        # 生成文件
        # print self.podcast.rss_str()
        self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)
Пример #6
0
def generate_podcast_xml(base, books):
    from podgen import Podcast, Episode
    from datetime import timedelta
    from podgen import Media

    p = Podcast()

    p.name = "AeonNeo's Audiobooks"
    p.description = "Description"
    p.website = "www.yangvincent.com"
    p.explicit = False

    # create episode
    for book_name in books:
        ep = Episode()
        ep.title = book_name[:-4]
        full_path = base + '/files/' + book_name
        dev_path = 'files/' + book_name
        try:
            book_size = os.path.getsize(dev_path)
        except OSError as e:
            print(e)
            book_size = 0

        ep.media = Media(full_path, type='audio/mp4a', size=book_size)
        p.episodes.append(ep)

    # Generate rss
    p.rss_file('skeleton/rss.xml', minimize=True)
Пример #7
0
 def test_constructor(self):
     # Overwrite fg from setup
     self.fg = Podcast(
         name=self.name,
         website=self.website,
         description=self.description,
         subtitle=self.subtitle,
         language=self.language,
         cloud=(self.cloudDomain, self.cloudPort, self.cloudPath,
                self.cloudRegisterProcedure, self.cloudProtocol),
         pubsubhubbub=self.pubsubhubbub,
         copyright=self.copyright,
         authors=[self.author],
         skip_days=self.skip_days,
         skip_hours=self.skip_hours,
         web_master=self.web_master,
         feed_url=self.feed_url,
         explicit=self.explicit,
         image=self.image,
         owner=self.owner,
         complete=self.complete,
         new_feed_url=self.new_feed_url,
         xslt=self.xslt,
     )
     # Test that the fields are actually set
     self.test_baseFeed()
def genero_feed(puntateList):
    if puntateList:
        # Creo un nuovo podcast
        p = Podcast()

        p.name = "Il Ruggito del Coniglio"
        p.description = "Il Ruggito del Coniglio, il programma cult di Radio 2 condotto da Marco Presta e Antonello Dose, racconta l'attualita con folgorante ironia."
        p.website = "http://www.raiplayradio.it/programmi/ilruggitodelconiglio/"
        p.explicit = True
        p.image = "https://rss.draghetti.it/ruggitodelconiglio_image.jpg"
        p.feed_url = "https://rss.draghetti.it/ruggitodelconiglio.xml"
        p.copyright = "Rai Radio 2"
        p.language = "it-IT"

        for puntata in puntateList:
            episode = Episode()

            episode.title = puntata[0].encode("ascii", "ignore")
            episode.link = puntata[1]

            # La dimensione del file e approssimativa
            episode.media = Media(puntata[3], puntata[4])

            if puntata[2]:
                episode.publication_date = datetime.datetime(int(puntata[2].split("/")[2]),
                                                             int(puntata[2].split("/")[1]),
                                                             int(puntata[2].split("/")[0]), 10,
                                                             00, tzinfo=pytz.utc)
            else:
                episode.publication_date = pytz.utc.localize(datetime.datetime.utcnow())

            p.episodes.append(episode)

        # Print to stdout, just as an example
        p.rss_file(rssfile, minimize=False)
Пример #9
0
    def test_mandatoryValues(self):
        # Try to create a Podcast once for each mandatory property.
        # On each iteration, exactly one of the properties is not set.
        # Therefore, an exception should be thrown on each iteration.
        mandatory_properties = set([
            "description",
            "title",
            "link",
            "explicit",
        ])

        for test_property in mandatory_properties:
            fg = Podcast()
            if test_property != "description":
                fg.description = self.description
            if test_property != "title":
                fg.name = self.name
            if test_property != "link":
                fg.website = self.website
            if test_property != "explicit":
                fg.explicit = self.explicit
            try:
                self.assertRaises(ValueError, fg._create_rss)
            except AssertionError as e:
                raise_from(
                    AssertionError("The test failed for %s" % test_property),
                    e)
Пример #10
0
def genero_feed(episodesList):
    if episodesList:
        # Creo un nuovo podcast
        p = Podcast()

        p.name = "All You Can Dance by Dino Brawn"
        p.description = "Feed Podcast non ufficiale di All You Can Dance by Dino Brown - Powered By Andrea Draghetti"
        p.website = "https://onedance.fm/"
        p.explicit = True
        p.image = "https://rss.draghetti.it/allyoucandance_image.jpg"
        p.feed_url = "https://rss.draghetti.it/allyoucandance.xml"
        p.copyright = "One Dance"
        p.language = "it-IT"

        for episodedetails in episodesList:
            episode = Episode()

            episode.title = episodedetails[1].encode("ascii", "ignore")
            episode.link = episodedetails[2].encode("ascii", "ignore")

            # La dimensione e statistica in base alle puntante analizzate
            episode.media = Media(episodedetails[3], 30000000, type="audio/x-m4a", duration=None)
            episode.publication_date = episodedetails[4]

            p.episodes.append(episode)

        # Print to stdout, just as an example
        p.rss_file(rssfile, minimize=False)
Пример #11
0
def main():
    """Create an example podcast and print it or save it to a file."""
    # There must be exactly one argument, and it is must end with rss
    if len(sys.argv) != 2 or not (
            sys.argv[1].endswith('rss')):
        # Invalid usage, print help message
        # print_enc is just a custom function which functions like print,
        # except it deals with byte arrays properly.
        print_enc ('Usage: %s ( <file>.rss | rss )' % \
                'python -m podgen')
        print_enc ('')
        print_enc ('  rss              -- Generate RSS test output and print it to stdout.')
        print_enc ('  <file>.rss       -- Generate RSS test teed and write it to file.rss.')
        print_enc ('')
        exit()

    # Remember what type of feed the user wants
    arg = sys.argv[1]

    from podgen import Podcast, Person, Media, Category, htmlencode
    # Initialize the feed
    p = Podcast()
    p.name = 'Testfeed'
    p.authors.append(Person("Lars Kiesow", "*****@*****.**"))
    p.website = 'http://example.com'
    p.copyright = 'cc-by'
    p.description = 'This is a cool feed!'
    p.language = 'de'
    p.feed_url = 'http://example.com/feeds/myfeed.rss'
    p.category = Category('Technology', 'Podcasting')
    p.explicit = False
    p.complete = False
    p.new_feed_url = 'http://example.com/new-feed.rss'
    p.owner = Person('John Doe', '*****@*****.**')
    p.xslt = "http://example.com/stylesheet.xsl"

    e1 = p.add_episode()
    e1.id = 'http://lernfunk.de/_MEDIAID_123#1'
    e1.title = 'First Element'
    e1.summary = htmlencode('''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Tamen
            aberramus a proposito, et, ne longius, prorsus, inquam, Piso, si ista
            mala sunt, placet. Aut etiam, ut vestitum, sic sententiam habeas aliam
            domesticam, aliam forensem, ut in fronte ostentatio sit, intus veritas
            occultetur? Cum id fugiunt, re eadem defendunt, quae Peripatetici,
            verba <3.''')
    e1.link = 'http://example.com'
    e1.authors = [Person('Lars Kiesow', '*****@*****.**')]
    e1.publication_date = datetime.datetime(2014, 5, 17, 13, 37, 10, tzinfo=pytz.utc)
    e1.media = Media("http://example.com/episodes/loremipsum.mp3", 454599964,
                     duration=
                     datetime.timedelta(hours=1, minutes=32, seconds=19))

    # Should we just print out, or write to file?
    if arg == 'rss':
        # Print
        print_enc(p.rss_str())
    elif arg.endswith('rss'):
        # Write to file
        p.rss_file(arg, minimize=True)
Пример #12
0
def generate_podcast_xml(podcasts):
    podcast = Podcast(name=config.PODCAST_NAME,
                      description=config.PODCAST_DESCRIPTION,
                      website=config.PODCAST_WEBSITE,
                      explicit=config.PODCAST_CONTAINS_EXPLICIT_CONTENT,
                      withhold_from_itunes=True)
    podcast.episodes = podcasts
    return podcast.rss_str()
Пример #13
0
    def album(self):
        page = requests.get(self.album_url, headers=self.header)
        soup = BeautifulSoup(page.content, "lxml")

        # 初始化
        self.podcast = Podcast()
        self.podcast.name = soup.find('h1', 'title').get_text()
        self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**'))
        self.podcast.website = self.album_url
        self.podcast.copyright = 'cc-by'
        if soup.find('div', 'album-intro') and soup.find('div', 'album-intro').get_text():
            self.podcast.description = soup.find('div', 'album-intro').get_text()
        else:
            self.podcast.description = self.podcast.name
        self.podcast.language = 'cn'
        self.podcast.image = soup.find('div', 'album-info').find('img').get('src').split('!')[0]
        self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("forecho", '*****@*****.**')

        album_list_content = requests.get(self.album_list_api, headers=self.header).content
        album_list_data = json.loads(album_list_content.decode('utf-8'))
        count = len(album_list_data['data']['tracksAudioPlay'])
        for each in album_list_data['data']['tracksAudioPlay']:
            try:
                detail_url = 'http://www.ximalaya.com/tracks/%s.json' % each['trackId']
                response = requests.get(detail_url, headers=self.header)
                item = json.loads(response.content)

                episode = self.podcast.add_episode()
                episode.id = str(each['index'])
                episode.title = each['trackName']
                print(self.podcast.name + '=====' + each['trackName'])
                image = each['trackCoverPath'].split('!')[0]
                if (image[-4:] == '.gif' or image[-4:] == '.bmp'):
                    episode.image = self.podcast.image
                else:
                    episode.image = image
                if item['intro']:
                    episode.summary = item['intro'].replace('\r\n', '')
                else:
                    episode.summary = each['trackName']
                episode.link = 'http://www.ximalaya.com%s' % each['albumUrl']
                episode.authors = [Person("forecho", '*****@*****.**')]
                episode.publication_date = self.reduction_time(item['time_until_now'], item['formatted_created_at'])
                episode.media = Media(each['src'], each['duration'])
                episode.position = count - each['index'] + 1
            except Exception as e:
                print('异常:', e)
                print('异常 URL:', 'http://www.ximalaya.com%s' % each['trackUrl'])
                traceback.print_exc()
            
        # 生成文件
        # print self.podcast.rss_str()
        self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)
Пример #14
0
    def test_removeEntryByIndex(self):
        fg = Podcast()
        self.feedId = 'http://example.com'
        self.title = 'Some Testfeed'

        fe = fg.add_episode()
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The Third BaseEpisode'
        assert len(fg.episodes) == 1
        fg.episodes.pop(0)
        assert len(fg.episodes) == 0
Пример #15
0
def rssfeed(request, programid):
    """ Builds the rss feed for a program identified by it's id. (int)

    1. Fetches all episodes of the program from the digas db.
    2. gets the programinfo from the app db
    3. Uses podgen to do the actual XML-generation.
    """
    podcasts = DigasPodcast.objects.using('digas').filter(
        softdel=0,
        program=int(programid)).only('program', 'title', 'remark', 'author',
                                     'createdate', 'broadcastdate', 'filename',
                                     'filesize', 'duration',
                                     'softdel').order_by('-createdate')
    programinfo = ProgramInfo.objects.get(programid=int(programid))

    # loading globalsettings here, and not at the module_level
    # This way django won't explode because of missing
    # constance_config table when we start on scratch
    # or set up in a new environment.
    from .models import globalsettings

    p = Podcast(
        name=programinfo.name,
        subtitle=programinfo.subtitle,
        description=programinfo.description,
        website=feed_url(programid),  # programinfo.website,
        explicit=programinfo.explicit,
        category=Category(programinfo.category),
        authors=[globalsettings.owner],
        language=programinfo.language,
        owner=globalsettings.owner,
        feed_url=feed_url(programid),
        new_feed_url=feed_url(programid),
        image=programinfo.image_url,
    )

    for episode in podcasts:
        # Get pubdate from createdate or broadcastdate
        pubdate = digas2pubdate(episode.createdate, episode.broadcastdate)
        # Add the episode to the list
        p.episodes.append(
            Episode(
                title=episode.title,
                media=Media(mp3url(episode.filename), episode.filesize),
                link=mp3url(episode.filename),  # multifeedreader uses this.
                id=guid(episode.filename),
                summary=episode.remark,
                publication_date=pubdate))

    # send it as unicode
    rss = u'%s' % p
    return HttpResponse(rss, content_type='application/xml')
Пример #16
0
def create_podcast(name, desc, website):
    p = Podcast()
    #if not res:
    p.name = name
    p.description = desc
    p.authors = [Person("Dawn News", "*****@*****.**")]
    p.website = website
    p.image = "http://3.15.38.214/zarahatkay/cover_art.png"
    p.language = "en-US"
    p.feed_url = "http://3.15.38.214/zarahatkay"
    p.category = Category("News &amp; Politics")
    p.explicit = False
    return p
Пример #17
0
def main(event, context):
    dynamodb = boto3.resource('dynamodb', region_name='sa-east-1')

    table = dynamodb.Table('semservidor-dev')

    podcasts = table.scan()

    author = Person("Evandro Pires da Silva", "*****@*****.**")
    p = Podcast(
        name="Sem Servidor",
        description=
        "Podcast dedicado a arquitetura serverless, com conteúdo de qualidade em português.",
        website="https://semservidor.com.br",
        explicit=False,
        copyright="2020 Evandro Pires da Silva",
        language="pr-BR",
        authors=[author],
        feed_url=
        "https://3tz8r90j0d.execute-api.sa-east-1.amazonaws.com/dev/podcasts/rss",
        category=Category("Music", "Music History"),
        owner=author,
        image="http://d30gvsirhz3ono.cloudfront.net/logo_semservidor_teste.jpg",
        web_master=Person(None, "*****@*****.**"))

    items = podcasts['Items']
    for item in items:
        base_url = "http://d30gvsirhz3ono.cloudfront.net/"
        file_path = base_url + item['info']['arquivo']['nome']
        p.episodes += [
            Episode(title=item['info']['episodio'],
                    media=Media(file_path,
                                int(item['info']['arquivo']['tamanho'])),
                    summary=item['info']['descricao'],
                    position=int(item['id']))
        ]

    p.apply_episode_order()
    rss = p.rss_str()

    response = {
        "statusCode": 200,
        "headers": {
            "content-type": "application/xml"
        },
        "body": rss
    }

    return response
Пример #18
0
def generate_rss_from_articles(feed_settings, articles):
    """
    Creates a FeedGenerator feed from a set of feed_entries.

    :param feed_settings: a feed_settings object containing
    :param articles:
    :return:
    """
    # Initialize the feed
    podcast = Podcast()
    podcast.name = feed_settings.title
    author = Person(feed_settings.author['name'], feed_settings.author['email'])
    podcast.authors.append(author)
    podcast.website = feed_settings.source_page_url
    podcast.copyright = feed_settings.copyright
    podcast.description = feed_settings.subtitle
    podcast.summary = feed_settings.subtitle
    podcast.subtitle = feed_settings.subtitle
    podcast.language = 'vi'
    podcast.feed_url = feed_settings.output_url
    podcast.image = feed_settings.img_url
    podcast.category = Category('Music', 'Music Commentary')
    podcast.explicit = False
    # p.complete = False
    # p.new_feed_url = 'http://example.com/new-feed.rss'
    podcast.owner = author
    # p.xslt = "http://example.com/stylesheet.xsl"

    vt_tz = pytz.timezone('Asia/Ho_Chi_Minh')
    pastdate = datetime.datetime(2000, 1, 1, 0, 0).astimezone(vt_tz)
    # podcast.last_updated = datetime.datetime.now(vt_tz)

    for article in articles:
        episode = podcast.add_episode()
        episode.id = article.link
        episode.title = article.title
        episode.summary = article.description
        episode.link = article.link
        # episode.authors = [Person('Lars Kiesow', '*****@*****.**')]
        episode.publication_date = article.pub_date
        pastdate = max(pastdate, article.pub_date)
        # episode.media = Media.create_from_server_response(article.media, size=None, duration=None)
        episode.media = Media(article.media, size=None, duration=None, type=article.type)

    podcast.last_updated = pastdate
    podcast.publication_date = pastdate

    return podcast
Пример #19
0
def rss(url_token):
    dropbox_access_token, title, description = get_the_latest_token_info(
        url_token)
    urls = get_temporary_link(dropbox_access_token)
    p = Podcast()
    p.name = title
    p.description = description
    p.website = "https://www.google.com"
    p.explicit = True

    for i, (size, url, uid, name) in enumerate(urls):
        my_episode = Episode()
        my_episode.title = os.path.splitext(name)[0]
        my_episode.id = uid
        my_episode.media = Media(url, size=size, type="audio/mpeg")
        p.episodes.append(my_episode)
    return Response(str(p), mimetype='text/xml')
Пример #20
0
def scrape_morning_edition(
        web_session=requests_html.HTMLSession(), params=params):

    podcast = Podcast()
    podcast.name = "NPR Morning Edition"
    podcast.description = \
        """Every weekday for over three decades, Morning Edition has taken
        listeners around the country and the world with two hours of multi-faceted
        stories and commentaries that inform, challenge and occasionally amuse.
        Morning Edition is the most listened-to news radio program in the country."""
    podcast.website = "https://www.npr.org/programs/morning-edition"
    podcast.explicit = False

    scrape(web_session, params, 'morning-edition', podcast)

    rssfeed = podcast.rss_str(minimize=False)
    #log.debug(f"\n\nfeed { rssfeed }")

    return rssfeed
Пример #21
0
def scrape_by_program(program,
                      web_session=requests_html.HTMLSession(),
                      params=params):
    podcast = Podcast()
    podcast.explicit = False
    podcast.website = params[PARAMS_BASEURL].format(program=program)

    if program == 'morning-edition':
        podcast.name = "NPR Morning Edition"
        podcast.description = \
            """Every weekday for over three decades, Morning Edition has taken
            listeners around the country and the world with two hours of multi-faceted
            stories and commentaries that inform, challenge and occasionally amuse.
            Morning Edition is the most listened-to news radio program in the country."""
        podcast.image = 'https://media.npr.org/assets/img/2018/08/06/npr_me_podcasttile_sq-4036eb96471eeed96c37dfba404bb48ea798e78c-s200-c85.jpg'

    elif program == 'all-things-considered':
        podcast.name = "NPR All Things Considered"
        podcast.description = \
            """NPR's afternoon news show"""
        podcast.image = 'https://media.npr.org/assets/img/2018/08/06/npr_atc_podcasttile_sq-bcc33a301405d37aa6bdcc090f43d29264915f4a-s200-c85.jpg'

    elif program == 'weekend-edition-saturday':
        podcast.name = "NPR Weekend Edition Saturday"
        podcast.description = \
            """NPR morning news on Saturday"""
        podcast.image = 'https://media.npr.org/assets/img/2019/02/26/we_otherentitiestemplatesat_sq-cbde87a2fa31b01047441e6f34d2769b0287bcd4-s200-c85.png'

    elif program == 'weekend-edition-sunday':
        podcast.name = "NPR Weekend Edition Sunday"
        podcast.description = \
            """NPR morning news show on Sunday"""
        podcast.image = 'https://media.npr.org/assets/img/2019/02/26/we_otherentitiestemplatesun_sq-4a03b35e7e5adfa446aec374523a578d54dc9bf5-s200-c85.png'

    else:
        raise WebFormatException(f"program { program } not found")

    scrape(web_session, params, program, podcast)

    rssfeed = podcast.rss_str(minimize=False)
    #log.debug(f"\n\nfeed { rssfeed }")

    return rssfeed
Пример #22
0
def genero_feed(puntateList):
    if puntateList:
        # Creo un nuovo podcast
        p = Podcast()

        p.name = "Pascal Rai Radio 2"
        p.description = "Pascal un programma di Matteo Caccia in onda su Radio2 che racconta storie di vita. Episodi grandi o piccoli, stravolgenti o minuti, momenti che hanno modificato per sempre la nostra vita o che, anche se di poco, l'hanno indirizzata. Storie che sono il termometro della temperatura di ognuno di noi e che in parte raccontano chi siamo. "
        p.website = "http://www.raiplayradio.it/programmi/pascal/"
        p.explicit = True
        p.image = "https://rss.draghetti.it/pascal_image.jpg"
        p.feed_url = "https://rss.draghetti.it/pascal.xml"
        p.copyright = "Rai Radio 2"
        p.language = "it-IT"

        for puntata in puntateList:
            episode = Episode()

            episode.title = puntata[0].encode("ascii", "ignore")
            episode.link = puntata[1]

            # La dimensione del file e approssimativa
            episode.media = Media(puntata[3], puntata[4])

            if puntata[2]:
                episode.publication_date = datetime.datetime(
                    int(puntata[2].split("/")[2]),
                    int(puntata[2].split("/")[1]),
                    int(puntata[2].split("/")[0]),
                    20,
                    00,
                    tzinfo=pytz.utc)
            else:
                episode.publication_date = pytz.utc.localize(
                    datetime.datetime.utcnow())

            p.episodes.append(episode)

        # Print to stdout, just as an example
        p.rss_file(rssfile, minimize=False)
Пример #23
0
    def setUp(self):

        self.itunes_ns = 'http://www.itunes.com/dtds/podcast-1.0.dtd'
        self.dublin_ns = 'http://purl.org/dc/elements/1.1/'

        fg = Podcast()
        self.title = 'Some Testfeed'
        self.link = 'http://lernfunk.de'
        self.description = 'A cool tent'
        self.explicit = False

        fg.name = self.title
        fg.website = self.link
        fg.description = self.description
        fg.explicit = self.explicit

        fe = fg.add_episode()
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The First Episode'
        self.fe = fe

        #Use also the list directly
        fe = Episode()
        fg.episodes.append(fe)
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The Second Episode'

        fe = fg.add_episode()
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The Third Episode'

        self.fg = fg

        warnings.simplefilter("always")

        def noop(*args, **kwargs):
            pass

        warnings.showwarning = noop
Пример #24
0
    def get_podcast(self):
        webpage = tools.get_url(self.album_info_url.format(self.album_id), self.headers)
        album_info = json.loads(webpage.decode('utf-8'))
        if album_info['ret'] == 200:
            album_info_data = album_info['data']

            self.podcast = Podcast()
            self.podcast.name = album_info_data['mainInfo']['albumTitle']
            self.podcast.website = self.album_url.format(self.album_id)
            if album_info_data['mainInfo']['richIntro']:
                self.podcast.description = album_info_data['mainInfo']['richIntro']
            self.podcast.language = 'cn'
            self.podcast.image = 'https:' + album_info_data['mainInfo']['cover'].split('!')[0]
            self.podcast.generator = 'kanemori.getpodcast'
            self.podcast.explicit = False
            self.podcast.withhold_from_itunes = True

            text = ''
            page_num = 1
            album_page_count = math.ceil(album_info_data['tracksInfo']['trackTotalCount'] / self.episode_pre_page) + 1
            while page_num <= album_page_count:
                webpage = tools.get_url(self.album_list_url.format(self.album_id, page_num, self.episode_pre_page),
                                        self.headers)
                album_list = json.loads(webpage.decode('utf-8'))
                for episode_info in album_list['data']['tracksAudioPlay']:
                    _, link = self.get_episode(episode_info['trackId'])
                    text += link

                page_num += 1

        path = './podcast/ximalaya'
        if not os.path.exists(path):
            os.makedirs(path)

        self.podcast.rss_file(os.path.join(path, '{}.xml'.format(self.album_id)), minimize=True)
        # tools.save_m4a(os.path.join(path, '{}.txt'.format(self.album_id)), text)
        print("「{}」が上手に焼きました".format(self.album_id))
Пример #25
0
    def generate_podcast(self, feed_name: str) -> str:
        """
        Create podcast XML based on the files found in podcastDir. Taken from
        https://podgen.readthedocs.io/en/latest/usage_guide/podcasts.html

        :param self: PodcastService class
        :param feed_name: name of the feed and the sub-directory for files
        :return:  string of the podcast
        """
        # Initialize the feed
        p = Podcast()

        # Required fields
        p.name = f'{feed_name} Archive'
        p.description = 'Stuff to listen to later'
        p.website = self.base_url
        p.complete = False

        # Optional
        p.language = 'en-US'
        p.feed_url = f'{p.website}/feeds/{feed_name}/rss'
        p.explicit = False
        p.authors.append(Person("Anthology"))

        # for filepath in glob.iglob(f'{self.search_dir}/{feed_name}/*.mp3'):
        for path in Path(f'{self.search_dir}/{feed_name}').glob('**/*.mp3'):
            filepath = str(path)
            episode = p.add_episode()

            # Attempt to load saved metadata
            metadata_file_name = filepath.replace('.mp3', '.json')
            try:
                with open(metadata_file_name) as metadata_file:
                    metadata = json.load(metadata_file)
            except FileNotFoundError:
                metadata = {}
            except JSONDecodeError:
                metadata = {}
                self.logger.error(f'Failed to read {metadata_file_name}')

            # Build the episode based on either the saved metadata or the file details
            episode.title = metadata.get(
                'title',
                filepath.split('/')[-1].rstrip('.mp3'))
            episode.summary = metadata.get('summary',
                                           htmlencode('Some Summary'))
            if 'link' in metadata:
                episode.link = metadata.get('link')
            if 'authors' in metadata:
                episode.authors = [
                    Person(author) for author in metadata.get('authors')
                ]
            episode.publication_date = \
                isoparse(metadata.get('publication_date')) if 'publication_date' in metadata \
                else datetime.fromtimestamp(os.path.getmtime(filepath), tz=pytz.utc)
            episode.media = Media(
                f'{p.website}/{filepath.lstrip(self.search_dir)}'.replace(
                    ' ', '+'), os.path.getsize(filepath))
            episode.media.populate_duration_from(filepath)

            if "image" in metadata:
                episode.image = metadata.get('image')
            else:
                for ext in ['.jpg', '.png']:
                    image_file_name = filepath.replace('.mp3', ext)
                    if os.path.isfile(image_file_name):
                        episode.image = f'{p.website}/{image_file_name.lstrip(self.search_dir)}'.replace(
                            ' ', '+')
                        break

            # Save the metadata for future editing
            if not os.path.exists(metadata_file_name):
                metadata = {
                    'title': episode.title,
                    'summary': episode.summary,
                    'publication_date': episode.publication_date,
                    'authors': episode.authors
                }
                with open(metadata_file_name, 'w') as outFile:
                    json.dump(metadata, outFile, indent=2, default=str)

        return p.rss_str()
Пример #26
0
    def album(self):
        album_info = requests.get(self.album_info_url.format(self.album_id),
                                  headers=self.header).content
        album_info_content = json.loads(album_info.decode('utf-8'))
        if album_info_content['ret'] == 200:
            album_info_data = album_info_content['data']

            # 初始化
            self.podcast = Podcast()
            self.podcast.name = album_info_data['mainInfo']['albumTitle']
            self.podcast.authors.append(
                Person("Powered by forecho", '*****@*****.**'))
            self.podcast.website = self.album_url.format(self.album_id)
            self.podcast.copyright = 'cc-by'
            if album_info_data['mainInfo']['richIntro']:
                self.podcast.description = album_info_data['mainInfo'][
                    'richIntro']
            else:
                self.podcast.description = self.podcast.name
            self.podcast.language = 'cn'
            self.podcast.image = 'https:' + album_info_data['mainInfo'][
                'cover'].split('!')[0]
            self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
            self.podcast.category = Category('Technology', 'Podcasting')
            self.podcast.explicit = False
            self.podcast.complete = False
            self.podcast.owner = Person("forecho", '*****@*****.**')
            page_num = 1
            # py2 +1
            track_total_count = math.ceil(
                album_info_data['tracksInfo']['trackTotalCount'] /
                self.page_size) + 1
            while page_num <= track_total_count:
                album_list = requests.get(self.album_list_url.format(
                    self.album_id, page_num, self.page_size),
                                          headers=self.header).content
                album_list_content = json.loads(album_list.decode('utf-8'))
                count = len(album_list_content['data']['tracksAudioPlay'])
                for each in album_list_content['data']['tracksAudioPlay']:
                    try:
                        detail = requests.get(self.detail_url.format(
                            each['trackId']),
                                              headers=self.header).content
                        detail_content = json.loads(detail.decode('utf-8'))
                        episode = self.podcast.add_episode()
                        episode.id = str(each['index'])
                        episode.title = each['trackName']
                        print(self.podcast.name + '=====' + each['trackName'])
                        image = each['trackCoverPath'].split('!')[0]
                        if image[-4:] == '.png' or image[-4:] == '.jpg':
                            episode.image = 'https:' + image
                        else:
                            episode.image = self.podcast.image
                        if 'intro' in detail_content:
                            episode.summary = detail_content['intro'].replace(
                                '\r\n', '')
                        else:
                            episode.summary = each['trackName']
                        episode.link = 'http://www.ximalaya.com%s' % each[
                            'albumUrl']
                        episode.authors = [
                            Person("forecho", '*****@*****.**')
                        ]
                        episode.publication_date = self.reduction_time(
                            detail_content['createdAt'])
                        episode.media = Media(each['src'], each['duration'])
                        episode.position = count - each['index'] + 1
                    except Exception as e:
                        print('异常:', e)
                        print('异常 URL:',
                              'https://www.ximalaya.com%s' % each['trackUrl'])
                        traceback.print_exc()
                # 生成文件
                # print self.podcast.rss_str()
                page_num = page_num + 1
            self.podcast.rss_file('ximalaya/%s.rss' % self.album_id,
                                  minimize=True)
def lambda_handler(event, context):
    print('Starting cccRssBuilder Lambda function')
    # Get episodes from DynamoDB
    episodes = query_episodes()
    episodes.sort(key=lambda x: x['episode-num'])

    # Create the podcast feed
    # Main podcast info comes from "episode 0"
    episodeInfo = episodes[0]
    separator = ', '
    p = Podcast()
    p.name = episodeInfo['name']
    p.description = episodeInfo['description']
    p.website = episodeInfo['website']
    p.explicit = episodeInfo['explicit']
    p.image = episodeInfo['image']
    p.feed_url = episodeInfo['feed-url']
    p.language = episodeInfo['language']
    p.category = Category(episodeInfo['category'], episodeInfo['subcategory'])
    p.owner = Person(episodeInfo['owner-name'], episodeInfo['owner-email'])
    p.authors = [Person(episodeInfo['owner-name'], episodeInfo['owner-email'])]

    # Process each episode
    for episode in episodes:
        # Skip "Episode 0"
        if episode['episode-num'] == 0:
            continue
        # Check if episode contains media file info (name, duration, size).  If not, add it to db and episode object.
        if 'media-file' not in episode:
            episodeNum = episode['episode-num']
            print('Analyzing media file for episode', episodeNum)
            mediaFile = 'ccc-{:03d}-{}.mp3'.format(int(episodeNum),
                                                   episode['pub-date'])
            print('Media file:', mediaFile)
            localMediaFile = '/tmp/' + mediaFile
            s3 = boto3.client('s3')
            s3.download_file('kwksolutions.com', 'ccc/media/' + mediaFile,
                             localMediaFile)

            # Try to analyze the mp3 file - looking for duration and file size
            try:
                audio = MP3(localMediaFile)
            except:
                print('Not an MP3 file!')
                return
            duration = round(audio.info.length)
            hours = int(duration / 3600)
            minutes = int((duration % 3600) / 60)
            seconds = duration % 60
            if hours == 0:
                durationStr = '{:02d}:{:02d}'.format(minutes, seconds)
            else:
                durationStr = '{:02d}:{:02d}:{:02d}'.format(
                    hours, minutes, seconds)
            size = str(os.path.getsize(localMediaFile))
            update_episode(episodeNum, mediaFile, size, durationStr)
            episode['media-file'] = mediaFile
            episode['size'] = size
            episode['duration'] = durationStr

        # Figure out all the info needed for the episode object
        mediaURL = 'https://www.kwksolutions.com/ccc/media/' + episode[
            'media-file']
        durationList = episode['duration'].split(':')
        secs = int(durationList[-1])
        mins = int(durationList[-2])
        try:
            h = int(durationList[-3])
        except:
            h = 0
        pubdateList = episode['pub-date'].split('-')
        year = int(pubdateList[0])
        month = int(pubdateList[1])
        day = int(pubdateList[2])

        # Build the episode object
        e = p.add_episode()
        e.id = mediaURL
        e.title = 'Episode ' + str(episode['episode-num'])
        e.summary = episode['description']
        e.link = 'http://christcommunitycarmel.org/get-involved/podcasts'
        e.publication_date = datetime.datetime(year,
                                               month,
                                               day,
                                               12,
                                               00,
                                               00,
                                               tzinfo=pytz.timezone('EST'))
        e.media = Media(mediaURL,
                        episode['size'],
                        duration=datetime.timedelta(hours=h,
                                                    minutes=mins,
                                                    seconds=secs))

    # Write the rss file
    print('Writing RSS file to S3')
    rssLocalFile = '/tmp/podcast.rss'
    rssS3File = 'ccc/podcast.rss'
    p.rss_file(rssLocalFile)
    s3 = boto3.client('s3')
    s3.upload_file(rssLocalFile,
                   'kwksolutions.com',
                   rssS3File,
                   ExtraArgs={'ContentType': 'text/xml'})

    return
Пример #28
0

content = requests.get(base_url).content

soup = BeautifulSoup(content, features="lxml")


urls_to_follow = []
for anchor in soup.select("#listProgramsContent a")[:10]:
    urls_to_follow.append(base_href + anchor.get("href"))


p = Podcast(
   name="Alta Tensão",
   description="Alta Tensão com António Freitas",
   image="https://cdn-images.rtp.pt/EPG/radio/imagens/1068_10159_53970.jpg",
   website=base_url,
   explicit=True,
)

episodes = []

for url in urls_to_follow:
    content = requests.get(url).content
    soup = BeautifulSoup(content, features="lxml")
    res = re.search(b'file : "(.+?)",\\n', content)
    title = soup.select("b.vod-title")[0].text
    date = soup.select(".vod-data p span.episode-date")[0].text
    media_url = res.groups()[0].decode()
    head = requests.head(url)
    if '\n' in title:
Пример #29
0
    if next((x for x in session_items if x['CID'] == cid), None):
        print(f'WARNING: duplicate CID {cid} for new item: {title}')

# write the new sessions json file
updated_session_items = new_items + session_items

for item in updated_session_items:
    item['link'] = f'{ipfs_prefix}{item["CID"]}{ipfs_suffix}'

with open(sessions_filename, 'w') as outfile:
    json.dump(updated_session_items, outfile, indent=2)

print('>>> wrote fresh sessions.json file')

# write the new rss file
p = Podcast()

p.name = "The Objectivism Seminar"
p.category = Category("Society &amp; Culture", "Philosophy")
p.language = "en-US"
p.explicit = True
p.description = (
    "A weekly online conference call to systematically study " +
    "the philosophy of Objectivism via the works of prominent Rand scholars.")
p.website = "https://www.ObjectivismSeminar.com"
p.image = "https://www.ObjectivismSeminar.com/assets/images/atlas-square.jpg"
p.feed_url = "https://www.ObjectivismSeminar.com/archives/rss"
p.authors = [Person("Greg Perkins, Host", "*****@*****.**")]
p.owner = Person("Greg Perkins", "*****@*****.**")

p.episodes += [
Пример #30
0
    def setUp(self):
        self.existing_locale = locale.setlocale(locale.LC_ALL, None)
        locale.setlocale(locale.LC_ALL, 'C')

        fg = Podcast()

        self.nsContent = "http://purl.org/rss/1.0/modules/content/"
        self.nsDc = "http://purl.org/dc/elements/1.1/"
        self.nsItunes = "http://www.itunes.com/dtds/podcast-1.0.dtd"
        self.feed_url = "http://example.com/feeds/myfeed.rss"

        self.name = 'Some Testfeed'

        # Use character not in ASCII to catch encoding errors
        self.author = Person('Jon Døll', '*****@*****.**')

        self.website = 'http://example.com'
        self.description = 'This is a cool feed!'
        self.subtitle = 'Coolest of all'

        self.language = 'en'

        self.cloudDomain = 'example.com'
        self.cloudPort = '4711'
        self.cloudPath = '/ws/example'
        self.cloudRegisterProcedure = 'registerProcedure'
        self.cloudProtocol = 'SOAP 1.1'

        self.pubsubhubbub = "http://pubsubhubbub.example.com/"

        self.contributor = {
            'name': "Contributor Name",
            'email': 'Contributor email'
        }
        self.copyright = "The copyright notice"
        self.docs = 'http://www.rssboard.org/rss-specification'
        self.skip_days = set(['Tuesday'])
        self.skip_hours = set([23])

        self.explicit = False

        self.programname = podgen.version.name

        self.web_master = Person(email='*****@*****.**')
        self.image = "http://example.com/static/podcast.png"
        self.owner = self.author
        self.complete = True
        self.new_feed_url = "https://example.com/feeds/myfeed2.rss"
        self.xslt = "http://example.com/feed/stylesheet.xsl"

        fg.name = self.name
        fg.website = self.website
        fg.description = self.description
        fg.subtitle = self.subtitle
        fg.language = self.language
        fg.cloud = (self.cloudDomain, self.cloudPort, self.cloudPath,
                    self.cloudRegisterProcedure, self.cloudProtocol)
        fg.pubsubhubbub = self.pubsubhubbub
        fg.copyright = self.copyright
        fg.authors.append(self.author)
        fg.skip_days = self.skip_days
        fg.skip_hours = self.skip_hours
        fg.web_master = self.web_master
        fg.feed_url = self.feed_url
        fg.explicit = self.explicit
        fg.image = self.image
        fg.owner = self.owner
        fg.complete = self.complete
        fg.new_feed_url = self.new_feed_url
        fg.xslt = self.xslt

        self.fg = fg

        warnings.simplefilter("always")

        def noop(*args, **kwargs):
            pass

        warnings.showwarning = noop