Exemplo n.º 1
0
    def test_mandatoryValues(self):
        # Try to create a Podcast once for each mandatory property.
        # On each iteration, exactly one of the properties is not set.
        # Therefore, an exception should be thrown on each iteration.
        mandatory_properties = set([
            "description",
            "title",
            "link",
            "explicit",
        ])

        for test_property in mandatory_properties:
            fg = Podcast()
            if test_property != "description":
                fg.description = self.description
            if test_property != "title":
                fg.name = self.name
            if test_property != "link":
                fg.website = self.website
            if test_property != "explicit":
                fg.explicit = self.explicit
            try:
                self.assertRaises(ValueError, fg._create_rss)
            except AssertionError as e:
                raise_from(AssertionError(
                    "The test failed for %s" % test_property), e)
Exemplo n.º 2
0
 def test_constructor(self):
     # Overwrite fg from setup
     self.fg = Podcast(
         name=self.name,
         website=self.website,
         description=self.description,
         subtitle=self.subtitle,
         language=self.language,
         cloud=(self.cloudDomain, self.cloudPort, self.cloudPath,
                self.cloudRegisterProcedure, self.cloudProtocol),
         pubsubhubbub=self.pubsubhubbub,
         copyright=self.copyright,
         authors=[self.author],
         skip_days=self.skip_days,
         skip_hours=self.skip_hours,
         web_master=self.web_master,
         feed_url=self.feed_url,
         explicit=self.explicit,
         image=self.image,
         owner=self.owner,
         complete=self.complete,
         new_feed_url=self.new_feed_url,
         xslt=self.xslt,
     )
     # Test that the fields are actually set
     self.test_baseFeed()
Exemplo n.º 3
0
    def album(self):
        page = requests.get(self.url, headers=self.header)
        soup = BeautifulSoup(page.content, "lxml")

        # 初始化
        self.podcast = Podcast()
        self.podcast.name = soup.find('div', 'detailContent_title').get_text()
        self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**'))
        self.podcast.website = self.url
        self.podcast.copyright = 'cc-by'
        self.podcast.description = soup.find('div', 'mid_intro').get_text()
        self.podcast.language = 'cn'
        self.podcast.image = soup.find('a', 'albumface180').find('img').get('src').split('!')[0]
        self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("forecho", '*****@*****.**')

        sound_ids = soup.find('div', class_='personal_body').get('sound_ids').split(',')

        for sound_id in sound_ids:
            date = soup.find('li', sound_id=sound_id).find('div', class_='operate').get_text().strip()
            self.detail(sound_id, date)
        # 生成文件
        # print self.podcast.rss_str()
        self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)
Exemplo n.º 4
0
    def album(self):
        album_info_content = requests.get(self.album_info_api).content
        album_info_data = json.loads(album_info_content)
        album_list_content = requests.get(self.album_list_api).content
        album_list_data = json.loads(album_list_content)

        self.podcast = Podcast()
        self.podcast.name = album_info_data['data']['title']
        self.podcast.authors.append(Person("Powered by maijver", '*****@*****.**'))
        self.podcast.website = self.url
        self.podcast.copyright = 'cc-by'
        self.podcast.description = album_info_data['data']['description']
        self.podcast.language = 'cn'
        self.podcast.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '')
        self.podcast.feed_url = 'http://podcast.forecho.com/qingting/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("maijver", '*****@*****.**')

        for each in album_list_data['data']:
            episode = self.podcast.add_episode()
            episode.id = str(each['id'])
            episode.title = each['title']
            print(self.podcast.name + '=====' + each['title'])
            episode.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '')
            episode.summary = each['title']
            episode.link = 'http://www.qingting.fm/channels/{}/programs/{}'.format(self.album_id, each['id'])
            episode.authors = [Person("forecho", '*****@*****.**')]
            episode.publication_date = self.reduction_time(each['update_time'])
            episode.media = Media("http://od.qingting.fm/{}".format(each['mediainfo']['bitrates_url'][0]['file_path']),
                                  each['duration'])

        self.podcast.rss_file('qingting/{}.rss'.format(self.album_id), minimize=True)
Exemplo n.º 5
0
def main():
    with open('thebugle.json') as f:
        episodes = json.load(f)

    p = Podcast(
        name="TimesOnLine Bugle Archive",
        description="Old Bugle episodes, podcast feed",
        website="https://www.thebuglepodcast.com/",
        explicit=False,
    )

    for episode in episodes:
        ep = p.add_episode(
            Episode(title=f"{episode['id']}: {episode['title']}"))
        ep.media = Media.create_from_server_response(
            f"{MEDIA_BASE_URL}/{episode['file']}")

        ep.media.fetch_duration()

        date = episode['date'].split('-')
        ep.publication_date = datetime(int(date[0]),
                                       int(date[1]),
                                       int(date[2]),
                                       0,
                                       0,
                                       0,
                                       tzinfo=pytz.utc)

    print(p.rss_str())
Exemplo n.º 6
0
def generate_podcast_xml(podcasts):
    podcast = Podcast(name=config.PODCAST_NAME,
                      description=config.PODCAST_DESCRIPTION,
                      website=config.PODCAST_WEBSITE,
                      explicit=config.PODCAST_CONTAINS_EXPLICIT_CONTENT,
                      withhold_from_itunes=True)
    podcast.episodes = podcasts
    return podcast.rss_str()
Exemplo n.º 7
0
    def album(self):
        page = requests.get(self.album_url, headers=self.header)
        soup = BeautifulSoup(page.content, "lxml")

        # 初始化
        self.podcast = Podcast()
        self.podcast.name = soup.find('h1', 'title').get_text()
        self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**'))
        self.podcast.website = self.album_url
        self.podcast.copyright = 'cc-by'
        if soup.find('div', 'album-intro') and soup.find('div', 'album-intro').get_text():
            self.podcast.description = soup.find('div', 'album-intro').get_text()
        else:
            self.podcast.description = self.podcast.name
        self.podcast.language = 'cn'
        self.podcast.image = soup.find('div', 'album-info').find('img').get('src').split('!')[0]
        self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("forecho", '*****@*****.**')

        album_list_content = requests.get(self.album_list_api, headers=self.header).content
        album_list_data = json.loads(album_list_content.decode('utf-8'))
        count = len(album_list_data['data']['tracksAudioPlay'])
        for each in album_list_data['data']['tracksAudioPlay']:
            try:
                detail_url = 'http://www.ximalaya.com/tracks/%s.json' % each['trackId']
                response = requests.get(detail_url, headers=self.header)
                item = json.loads(response.content)

                episode = self.podcast.add_episode()
                episode.id = str(each['index'])
                episode.title = each['trackName']
                print(self.podcast.name + '=====' + each['trackName'])
                image = each['trackCoverPath'].split('!')[0]
                if (image[-4:] == '.gif' or image[-4:] == '.bmp'):
                    episode.image = self.podcast.image
                else:
                    episode.image = image
                if item['intro']:
                    episode.summary = item['intro'].replace('\r\n', '')
                else:
                    episode.summary = each['trackName']
                episode.link = 'http://www.ximalaya.com%s' % each['albumUrl']
                episode.authors = [Person("forecho", '*****@*****.**')]
                episode.publication_date = self.reduction_time(item['time_until_now'], item['formatted_created_at'])
                episode.media = Media(each['src'], each['duration'])
                episode.position = count - each['index'] + 1
            except Exception as e:
                print('异常:', e)
                print('异常 URL:', 'http://www.ximalaya.com%s' % each['trackUrl'])
                traceback.print_exc()
            
        # 生成文件
        # print self.podcast.rss_str()
        self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)
Exemplo n.º 8
0
    def test_removeEntryByIndex(self):
        fg = Podcast()
        self.feedId = 'http://example.com'
        self.title = 'Some Testfeed'

        fe = fg.add_episode()
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The Third BaseEpisode'
        assert len(fg.episodes) == 1
        fg.episodes.pop(0)
        assert len(fg.episodes) == 0
Exemplo n.º 9
0
    def test_removeEntryByIndex(self):
        fg = Podcast()
        self.feedId = 'http://example.com'
        self.title = 'Some Testfeed'

        fe = fg.add_episode()
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The Third BaseEpisode'
        assert len(fg.episodes) == 1
        fg.episodes.pop(0)
        assert len(fg.episodes) == 0
Exemplo n.º 10
0
def generate_podcast_xml(base, books):
    from podgen import Podcast, Episode
    from datetime import timedelta
    from podgen import Media

    p = Podcast()

    p.name = "AeonNeo's Audiobooks"
    p.description = "Description"
    p.website = "www.yangvincent.com"
    p.explicit = False

    # create episode
    for book_name in books:
        ep = Episode()
        ep.title = book_name[:-4]
        full_path = base + '/files/' + book_name
        dev_path = 'files/' + book_name
        try:
            book_size = os.path.getsize(dev_path)
        except OSError as e:
            print(e)
            book_size = 0

        ep.media = Media(full_path, type='audio/mp4a', size=book_size)
        p.episodes.append(ep)

    # Generate rss
    p.rss_file('skeleton/rss.xml', minimize=True)
Exemplo n.º 11
0
class Qingting(object):
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.url = 'http://www.qingting.fm/channels/{}'.format(album_id)
        self.album_list_api = "http://api2.qingting.fm/v6/media/channelondemands/{}/programs/order/0/curpage/1/pagesize/100".format(
            album_id)
        self.album_info_api = "http://api2.qingting.fm/v6/media/channelondemands/{}".format(album_id)

    def album(self):
        album_info_content = requests.get(self.album_info_api).content
        album_info_data = json.loads(album_info_content)

        album_list_content = requests.get(self.album_list_api).content
        album_list_data = json.loads(album_list_content)

        self.podcast = Podcast()
        self.podcast.name = album_info_data['data']['title']
        self.podcast.authors.append(Person("Powered by maijver", '*****@*****.**'))
        self.podcast.website = self.url
        self.podcast.copyright = 'cc-by'
        self.podcast.description = album_info_data['data']['description']
        self.podcast.language = 'cn'
        self.podcast.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '')
        self.podcast.feed_url = 'http://podcast.forecho.com/qingting/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("maijver", '*****@*****.**')

        for each in album_list_data['data']:
            episode = self.podcast.add_episode()
            episode.id = str(each['id'])
            episode.title = each['title']
            print(episode.title)
            episode.image = album_info_data['data']['thumbs']['small_thumb'].replace('!200', '')
            episode.summary = each['title']
            episode.link = 'http://www.qingting.fm/channels/{}/programs/{}'.format(self.album_id, each['id'])
            episode.authors = [Person("forecho", '*****@*****.**')]
            episode.publication_date = self.reduction_time(each['update_time'])
            episode.media = Media("http://od.qingting.fm/{}".format(each['mediainfo']['bitrates_url'][0]['file_path']),
                                  each['duration'])

        self.podcast.rss_file('qingting/{}.rss'.format(self.album_id), minimize=True)

    @staticmethod
    def reduction_time(created_date):
        timestamp = datetime.strptime(created_date, "%Y-%m-%d %H:%M:%S")
        return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute,
                        tzinfo=pytz.utc)
Exemplo n.º 12
0
def main(event, context):
    dynamodb = boto3.resource('dynamodb', region_name='sa-east-1')

    table = dynamodb.Table('semservidor-dev')

    podcasts = table.scan()

    author = Person("Evandro Pires da Silva", "*****@*****.**")
    p = Podcast(
        name="Sem Servidor",
        description=
        "Podcast dedicado a arquitetura serverless, com conteúdo de qualidade em português.",
        website="https://semservidor.com.br",
        explicit=False,
        copyright="2020 Evandro Pires da Silva",
        language="pr-BR",
        authors=[author],
        feed_url=
        "https://3tz8r90j0d.execute-api.sa-east-1.amazonaws.com/dev/podcasts/rss",
        category=Category("Music", "Music History"),
        owner=author,
        image="http://d30gvsirhz3ono.cloudfront.net/logo_semservidor_teste.jpg",
        web_master=Person(None, "*****@*****.**"))

    items = podcasts['Items']
    for item in items:
        base_url = "http://d30gvsirhz3ono.cloudfront.net/"
        file_path = base_url + item['info']['arquivo']['nome']
        p.episodes += [
            Episode(title=item['info']['episodio'],
                    media=Media(file_path,
                                int(item['info']['arquivo']['tamanho'])),
                    summary=item['info']['descricao'],
                    position=int(item['id']))
        ]

    p.apply_episode_order()
    rss = p.rss_str()

    response = {
        "statusCode": 200,
        "headers": {
            "content-type": "application/xml"
        },
        "body": rss
    }

    return response
Exemplo n.º 13
0
 def test_constructor(self):
     # Overwrite fg from setup
     self.fg = Podcast(
         name=self.name,
         website=self.website,
         description=self.description,
         subtitle=self.subtitle,
         language=self.language,
         cloud=(self.cloudDomain, self.cloudPort, self.cloudPath,
                self.cloudRegisterProcedure, self.cloudProtocol),
         pubsubhubbub=self.pubsubhubbub,
         copyright=self.copyright,
         authors=[self.author],
         skip_days=self.skip_days,
         skip_hours=self.skip_hours,
         web_master=self.web_master,
         feed_url=self.feed_url,
         explicit=self.explicit,
         image=self.image,
         owner=self.owner,
         complete=self.complete,
         new_feed_url=self.new_feed_url,
         xslt=self.xslt,
     )
     # Test that the fields are actually set
     self.test_baseFeed()
Exemplo n.º 14
0
Arquivo: app.py Projeto: candyer/Flask
def index():
	'''https://podgen.readthedocs.io/en/latest/'''
	urls = get_temporary_link()
	p = Podcast()
	p.name = "ambience"
	p.description = "ambience"
	p.website = "LINK HERE"
	p.explicit = True	

	for i, (size, url) in enumerate(urls):
		my_episode = Episode()
		my_episode.title = "ambience music {}".format(i + 1)
		my_episode.media = Media(url,
								 size=size,
								 type="audio/mpeg")
		p.episodes.append(my_episode)

	rss = str(p)
	return Response(rss, mimetype='text/xml')
Exemplo n.º 15
0
    def test_mandatoryValues(self):
        # Try to create a Podcast once for each mandatory property.
        # On each iteration, exactly one of the properties is not set.
        # Therefore, an exception should be thrown on each iteration.
        mandatory_properties = set([
            "description",
            "title",
            "link",
            "explicit",
        ])

        for test_property in mandatory_properties:
            fg = Podcast()
            if test_property != "description":
                fg.description = self.description
            if test_property != "title":
                fg.name = self.name
            if test_property != "link":
                fg.website = self.website
            if test_property != "explicit":
                fg.explicit = self.explicit
            try:
                self.assertRaises(ValueError, fg._create_rss)
            except AssertionError as e:
                raise_from(
                    AssertionError("The test failed for %s" % test_property),
                    e)
Exemplo n.º 16
0
def rssfeed(request, programid):
    """ Builds the rss feed for a program identified by it's id. (int)

    1. Fetches all episodes of the program from the digas db.
    2. gets the programinfo from the app db
    3. Uses podgen to do the actual XML-generation.
    """
    podcasts = DigasPodcast.objects.using('digas').filter(
        softdel=0,
        program=int(programid)).only('program', 'title', 'remark', 'author',
                                     'createdate', 'broadcastdate', 'filename',
                                     'filesize', 'duration',
                                     'softdel').order_by('-createdate')
    programinfo = ProgramInfo.objects.get(programid=int(programid))

    # loading globalsettings here, and not at the module_level
    # This way django won't explode because of missing
    # constance_config table when we start on scratch
    # or set up in a new environment.
    from .models import globalsettings

    p = Podcast(
        name=programinfo.name,
        subtitle=programinfo.subtitle,
        description=programinfo.description,
        website=feed_url(programid),  # programinfo.website,
        explicit=programinfo.explicit,
        category=Category(programinfo.category),
        authors=[globalsettings.owner],
        language=programinfo.language,
        owner=globalsettings.owner,
        feed_url=feed_url(programid),
        new_feed_url=feed_url(programid),
        image=programinfo.image_url,
    )

    for episode in podcasts:
        # Get pubdate from createdate or broadcastdate
        pubdate = digas2pubdate(episode.createdate, episode.broadcastdate)
        # Add the episode to the list
        p.episodes.append(
            Episode(
                title=episode.title,
                media=Media(mp3url(episode.filename), episode.filesize),
                link=mp3url(episode.filename),  # multifeedreader uses this.
                id=guid(episode.filename),
                summary=episode.remark,
                publication_date=pubdate))

    # send it as unicode
    rss = u'%s' % p
    return HttpResponse(rss, content_type='application/xml')
Exemplo n.º 17
0
    def get_podcast(self):
        webpage = tools.get_url(self.album_info_url.format(self.album_id), self.headers)
        album_info = json.loads(webpage.decode('utf-8'))
        if album_info['ret'] == 200:
            album_info_data = album_info['data']

            self.podcast = Podcast()
            self.podcast.name = album_info_data['mainInfo']['albumTitle']
            self.podcast.website = self.album_url.format(self.album_id)
            if album_info_data['mainInfo']['richIntro']:
                self.podcast.description = album_info_data['mainInfo']['richIntro']
            self.podcast.language = 'cn'
            self.podcast.image = 'https:' + album_info_data['mainInfo']['cover'].split('!')[0]
            self.podcast.generator = 'kanemori.getpodcast'
            self.podcast.explicit = False
            self.podcast.withhold_from_itunes = True

            text = ''
            page_num = 1
            album_page_count = math.ceil(album_info_data['tracksInfo']['trackTotalCount'] / self.episode_pre_page) + 1
            while page_num <= album_page_count:
                webpage = tools.get_url(self.album_list_url.format(self.album_id, page_num, self.episode_pre_page),
                                        self.headers)
                album_list = json.loads(webpage.decode('utf-8'))
                for episode_info in album_list['data']['tracksAudioPlay']:
                    _, link = self.get_episode(episode_info['trackId'])
                    text += link

                page_num += 1

        path = './podcast/ximalaya'
        if not os.path.exists(path):
            os.makedirs(path)

        self.podcast.rss_file(os.path.join(path, '{}.xml'.format(self.album_id)), minimize=True)
        # tools.save_m4a(os.path.join(path, '{}.txt'.format(self.album_id)), text)
        print("「{}」が上手に焼きました".format(self.album_id))
Exemplo n.º 18
0
    def setUp(self):

        self.itunes_ns = 'http://www.itunes.com/dtds/podcast-1.0.dtd'
        self.dublin_ns = 'http://purl.org/dc/elements/1.1/'

        fg = Podcast()
        self.title = 'Some Testfeed'
        self.link = 'http://lernfunk.de'
        self.description = 'A cool tent'
        self.explicit = False

        fg.name = self.title
        fg.website = self.link
        fg.description = self.description
        fg.explicit = self.explicit

        fe = fg.add_episode()
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The First Episode'
        self.fe = fe

        #Use also the list directly
        fe = Episode()
        fg.episodes.append(fe)
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The Second Episode'

        fe = fg.add_episode()
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The Third Episode'

        self.fg = fg

        warnings.simplefilter("always")

        def noop(*args, **kwargs):
            pass

        warnings.showwarning = noop
Exemplo n.º 19
0
def scrape_morning_edition(
        web_session=requests_html.HTMLSession(), params=params):

    podcast = Podcast()
    podcast.name = "NPR Morning Edition"
    podcast.description = \
        """Every weekday for over three decades, Morning Edition has taken
        listeners around the country and the world with two hours of multi-faceted
        stories and commentaries that inform, challenge and occasionally amuse.
        Morning Edition is the most listened-to news radio program in the country."""
    podcast.website = "https://www.npr.org/programs/morning-edition"
    podcast.explicit = False

    scrape(web_session, params, 'morning-edition', podcast)

    rssfeed = podcast.rss_str(minimize=False)
    #log.debug(f"\n\nfeed { rssfeed }")

    return rssfeed
Exemplo n.º 20
0
def rss(url_token):
    dropbox_access_token, title, description = get_the_latest_token_info(
        url_token)
    urls = get_temporary_link(dropbox_access_token)
    p = Podcast()
    p.name = title
    p.description = description
    p.website = "https://www.google.com"
    p.explicit = True

    for i, (size, url, uid, name) in enumerate(urls):
        my_episode = Episode()
        my_episode.title = os.path.splitext(name)[0]
        my_episode.id = uid
        my_episode.media = Media(url, size=size, type="audio/mpeg")
        p.episodes.append(my_episode)
    return Response(str(p), mimetype='text/xml')
Exemplo n.º 21
0
    def setUp(self):

        self.itunes_ns = 'http://www.itunes.com/dtds/podcast-1.0.dtd'
        self.dublin_ns = 'http://purl.org/dc/elements/1.1/'

        fg = Podcast()
        self.title = 'Some Testfeed'
        self.link = 'http://lernfunk.de'
        self.description = 'A cool tent'
        self.explicit = False

        fg.name = self.title
        fg.website = self.link
        fg.description = self.description
        fg.explicit = self.explicit

        fe = fg.add_episode()
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The First Episode'
        self.fe = fe

        #Use also the list directly
        fe = Episode()
        fg.episodes.append(fe)
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The Second Episode'

        fe = fg.add_episode()
        fe.id = 'http://lernfunk.de/media/654321/1'
        fe.title = 'The Third Episode'

        self.fg = fg

        warnings.simplefilter("always")
        def noop(*args, **kwargs):
            pass
        warnings.showwarning = noop
Exemplo n.º 22
0
def genero_feed(puntateList):
    if puntateList:
        # Creo un nuovo podcast
        p = Podcast()

        p.name = "Pascal Rai Radio 2"
        p.description = "Pascal un programma di Matteo Caccia in onda su Radio2 che racconta storie di vita. Episodi grandi o piccoli, stravolgenti o minuti, momenti che hanno modificato per sempre la nostra vita o che, anche se di poco, l'hanno indirizzata. Storie che sono il termometro della temperatura di ognuno di noi e che in parte raccontano chi siamo. "
        p.website = "http://www.raiplayradio.it/programmi/pascal/"
        p.explicit = True
        p.image = "https://rss.draghetti.it/pascal_image.jpg"
        p.feed_url = "https://rss.draghetti.it/pascal.xml"
        p.copyright = "Rai Radio 2"
        p.language = "it-IT"

        for puntata in puntateList:
            episode = Episode()

            episode.title = puntata[0].encode("ascii", "ignore")
            episode.link = puntata[1]

            # La dimensione del file e approssimativa
            episode.media = Media(puntata[3], puntata[4])

            if puntata[2]:
                episode.publication_date = datetime.datetime(int(puntata[2].split("/")[2]),
                                                             int(puntata[2].split("/")[1]),
                                                             int(puntata[2].split("/")[0]), 20,
                                                             00, tzinfo=pytz.utc)
            else:
                episode.publication_date = pytz.utc.localize(datetime.datetime.utcnow())

            p.episodes.append(episode)

        # Print to stdout, just as an example
        p.rss_file(rssfile, minimize=False)
Exemplo n.º 23
0
    def setUp(self):
        self.existing_locale = locale.setlocale(locale.LC_ALL, None)
        locale.setlocale(locale.LC_ALL, 'C')

        fg = Podcast()

        self.nsContent = "http://purl.org/rss/1.0/modules/content/"
        self.nsDc = "http://purl.org/dc/elements/1.1/"
        self.nsItunes = "http://www.itunes.com/dtds/podcast-1.0.dtd"
        self.feed_url = "http://example.com/feeds/myfeed.rss"

        self.name = 'Some Testfeed'

        # Use character not in ASCII to catch encoding errors
        self.author = Person('Jon Døll', '*****@*****.**')

        self.website = 'http://example.com'
        self.description = 'This is a cool feed!'
        self.subtitle = 'Coolest of all'

        self.language = 'en'

        self.cloudDomain = 'example.com'
        self.cloudPort = '4711'
        self.cloudPath = '/ws/example'
        self.cloudRegisterProcedure = 'registerProcedure'
        self.cloudProtocol = 'SOAP 1.1'

        self.pubsubhubbub = "http://pubsubhubbub.example.com/"

        self.contributor = {
            'name': "Contributor Name",
            'email': 'Contributor email'
        }
        self.copyright = "The copyright notice"
        self.docs = 'http://www.rssboard.org/rss-specification'
        self.skip_days = set(['Tuesday'])
        self.skip_hours = set([23])

        self.explicit = False

        self.programname = podgen.version.name

        self.web_master = Person(email='*****@*****.**')
        self.image = "http://example.com/static/podcast.png"
        self.owner = self.author
        self.complete = True
        self.new_feed_url = "https://example.com/feeds/myfeed2.rss"
        self.xslt = "http://example.com/feed/stylesheet.xsl"

        fg.name = self.name
        fg.website = self.website
        fg.description = self.description
        fg.subtitle = self.subtitle
        fg.language = self.language
        fg.cloud = (self.cloudDomain, self.cloudPort, self.cloudPath,
                    self.cloudRegisterProcedure, self.cloudProtocol)
        fg.pubsubhubbub = self.pubsubhubbub
        fg.copyright = self.copyright
        fg.authors.append(self.author)
        fg.skip_days = self.skip_days
        fg.skip_hours = self.skip_hours
        fg.web_master = self.web_master
        fg.feed_url = self.feed_url
        fg.explicit = self.explicit
        fg.image = self.image
        fg.owner = self.owner
        fg.complete = self.complete
        fg.new_feed_url = self.new_feed_url
        fg.xslt = self.xslt

        self.fg = fg

        warnings.simplefilter("always")

        def noop(*args, **kwargs):
            pass

        warnings.showwarning = noop
Exemplo n.º 24
0
class Ximalaya:
    def __init__(self, album_id):
        self.headers = tools.get_headers()
        self.podcast = None
        self.album_id = album_id
        self.episode_pre_page = 30
        self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}"
        self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}"
        self.episode_detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?trackId={}"
        self.album_url = "https://www.ximalaya.com/album/{}"

    def get_podcast(self):
        webpage = tools.get_url(self.album_info_url.format(self.album_id), self.headers)
        album_info = json.loads(webpage.decode('utf-8'))
        if album_info['ret'] == 200:
            album_info_data = album_info['data']

            self.podcast = Podcast()
            self.podcast.name = album_info_data['mainInfo']['albumTitle']
            self.podcast.website = self.album_url.format(self.album_id)
            if album_info_data['mainInfo']['richIntro']:
                self.podcast.description = album_info_data['mainInfo']['richIntro']
            self.podcast.language = 'cn'
            self.podcast.image = 'https:' + album_info_data['mainInfo']['cover'].split('!')[0]
            self.podcast.generator = 'kanemori.getpodcast'
            self.podcast.explicit = False
            self.podcast.withhold_from_itunes = True

            text = ''
            page_num = 1
            album_page_count = math.ceil(album_info_data['tracksInfo']['trackTotalCount'] / self.episode_pre_page) + 1
            while page_num <= album_page_count:
                webpage = tools.get_url(self.album_list_url.format(self.album_id, page_num, self.episode_pre_page),
                                        self.headers)
                album_list = json.loads(webpage.decode('utf-8'))
                for episode_info in album_list['data']['tracksAudioPlay']:
                    _, link = self.get_episode(episode_info['trackId'])
                    text += link

                page_num += 1

        path = './podcast/ximalaya'
        if not os.path.exists(path):
            os.makedirs(path)

        self.podcast.rss_file(os.path.join(path, '{}.xml'.format(self.album_id)), minimize=True)
        # tools.save_m4a(os.path.join(path, '{}.txt'.format(self.album_id)), text)
        print("「{}」が上手に焼きました".format(self.album_id))

    def get_episode(self, episode_id):
        trycount = 0
        findepisode = False

        while not findepisode:
            if trycount > 0:
                print("再接続中" + str(trycount) + "......")
            if trycount > 1:
                print("error url: " + self.episode_detail_url.format(episode_id) + "\n")
                return False, "error url: " + self.episode_detail_url.format(episode_id) + "\n"

            webpage = tools.get_url(self.episode_detail_url.format(episode_id), self.headers)
            detail = json.loads(webpage.decode('utf-8'))
            episode = self.podcast.add_episode()
            episode.id = str('ximalaya_' + str(episode_id))
            episode.title = detail['title']
            # print(self.podcast.name + '=====' + episode.title)
            if 'intro' in detail:
                episode.summary = detail['intro'].replace('\r', '\\r').replace('\n', '\\n')
            episode.publication_date = tools.publication_time(detail['createdAt'])
            episode.media = Media(detail['playUrl32'], duration=timedelta(milliseconds=detail['duration']))
            # episode.media = Media.create_from_server_response(detail['playUrl32'],
            #                                                   duration=timedelta(seconds=detail['duration']))
            episode.position = 1
            findepisode = True

            if not findepisode:
                trycount += 1
                print("30秒後に再接続する.......")
                sleep(30)

        return True, detail['playUrl32'] + '\n'
Exemplo n.º 25
0
def genero_feed(puntateList):
    if puntateList:
        # Creo un nuovo podcast
        p = Podcast()

        p.name = "Il Ruggito del Coniglio"
        p.description = "Il Ruggito del Coniglio, il programma cult di Radio 2 condotto da Marco Presta e Antonello Dose, racconta l'attualita con folgorante ironia."
        p.website = "http://www.raiplayradio.it/programmi/ilruggitodelconiglio/"
        p.explicit = True
        p.image = "https://rss.draghetti.it/ruggitodelconiglio_image.jpg"
        p.feed_url = "https://rss.draghetti.it/ruggitodelconiglio.xml"
        p.copyright = "Rai Radio 2"
        p.language = "it-IT"

        for puntata in puntateList:
            episode = Episode()

            episode.title = puntata[0].encode("ascii", "ignore")
            episode.link = puntata[1]

            # La dimensione del file e approssimativa
            episode.media = Media(puntata[3], puntata[4])

            if puntata[2]:
                episode.publication_date = datetime.datetime(int(puntata[2].split("/")[2]),
                                                             int(puntata[2].split("/")[1]),
                                                             int(puntata[2].split("/")[0]), 10,
                                                             00, tzinfo=pytz.utc)
            else:
                episode.publication_date = pytz.utc.localize(datetime.datetime.utcnow())

            p.episodes.append(episode)

        # Print to stdout, just as an example
        p.rss_file(rssfile, minimize=False)
Exemplo n.º 26
0
def generate_rss_from_articles(feed_settings, articles):
    """
    Creates a FeedGenerator feed from a set of feed_entries.

    :param feed_settings: a feed_settings object containing
    :param articles:
    :return:
    """
    # Initialize the feed
    podcast = Podcast()
    podcast.name = feed_settings.title
    author = Person(feed_settings.author['name'], feed_settings.author['email'])
    podcast.authors.append(author)
    podcast.website = feed_settings.source_page_url
    podcast.copyright = feed_settings.copyright
    podcast.description = feed_settings.subtitle
    podcast.summary = feed_settings.subtitle
    podcast.subtitle = feed_settings.subtitle
    podcast.language = 'vi'
    podcast.feed_url = feed_settings.output_url
    podcast.image = feed_settings.img_url
    podcast.category = Category('Music', 'Music Commentary')
    podcast.explicit = False
    # p.complete = False
    # p.new_feed_url = 'http://example.com/new-feed.rss'
    podcast.owner = author
    # p.xslt = "http://example.com/stylesheet.xsl"

    vt_tz = pytz.timezone('Asia/Ho_Chi_Minh')
    pastdate = datetime.datetime(2000, 1, 1, 0, 0).astimezone(vt_tz)
    # podcast.last_updated = datetime.datetime.now(vt_tz)

    for article in articles:
        episode = podcast.add_episode()
        episode.id = article.link
        episode.title = article.title
        episode.summary = article.description
        episode.link = article.link
        # episode.authors = [Person('Lars Kiesow', '*****@*****.**')]
        episode.publication_date = article.pub_date
        pastdate = max(pastdate, article.pub_date)
        # episode.media = Media.create_from_server_response(article.media, size=None, duration=None)
        episode.media = Media(article.media, size=None, duration=None, type=article.type)

    podcast.last_updated = pastdate
    podcast.publication_date = pastdate

    return podcast
Exemplo n.º 27
0
class Ximalaya():
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.album_list_api = "http://www.ximalaya.com/revision/play/album?albumId={}&pageNum=1&sort=1&pageSize=999".format(album_id)
        self.album_url = 'http://www.ximalaya.com/album/%s' % album_id
        self.header = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Referer': self.album_url,
            'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1',
        }

    def album(self):
        page = requests.get(self.album_url, headers=self.header)
        soup = BeautifulSoup(page.content, "lxml")

        # 初始化
        self.podcast = Podcast()
        self.podcast.name = soup.find('h1', 'title').get_text()
        self.podcast.authors.append(Person("Powered by forecho", '*****@*****.**'))
        self.podcast.website = self.album_url
        self.podcast.copyright = 'cc-by'
        if soup.find('div', 'album-intro') and soup.find('div', 'album-intro').get_text():
            self.podcast.description = soup.find('div', 'album-intro').get_text()
        else:
            self.podcast.description = self.podcast.name
        self.podcast.language = 'cn'
        self.podcast.image = soup.find('div', 'album-info').find('img').get('src').split('!')[0]
        self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("forecho", '*****@*****.**')

        album_list_content = requests.get(self.album_list_api, headers=self.header).content
        album_list_data = json.loads(album_list_content.decode('utf-8'))
        count = len(album_list_data['data']['tracksAudioPlay'])
        for each in album_list_data['data']['tracksAudioPlay']:
            try:
                detail_url = 'http://www.ximalaya.com/tracks/%s.json' % each['trackId']
                response = requests.get(detail_url, headers=self.header)
                item = json.loads(response.content)

                episode = self.podcast.add_episode()
                episode.id = str(each['index'])
                episode.title = each['trackName']
                print(self.podcast.name + '=====' + each['trackName'])
                image = each['trackCoverPath'].split('!')[0]
                if (image[-4:] == '.gif' or image[-4:] == '.bmp'):
                    episode.image = self.podcast.image
                else:
                    episode.image = image
                if item['intro']:
                    episode.summary = item['intro'].replace('\r\n', '')
                else:
                    episode.summary = each['trackName']
                episode.link = 'http://www.ximalaya.com%s' % each['albumUrl']
                episode.authors = [Person("forecho", '*****@*****.**')]
                episode.publication_date = self.reduction_time(item['time_until_now'], item['formatted_created_at'])
                episode.media = Media(each['src'], each['duration'])
                episode.position = count - each['index'] + 1
            except Exception as e:
                print('异常:', e)
                print('异常 URL:', 'http://www.ximalaya.com%s' % each['trackUrl'])
                traceback.print_exc()
            
        # 生成文件
        # print self.podcast.rss_str()
        self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)

    # 时间转换 第一个参数是  "3年前", "12月11日 17:00"
    @staticmethod
    def reduction_time(time_until_now, created_at):
        date = datetime.strptime(created_at, "%m月%d日 %H:%M")
        reduction_year = datetime.now().year
        if '年前' in time_until_now:
            year = int(time_until_now.split('年前')[0])
            reduction = (datetime.now(tzlocal()) - relativedelta(years=year))
            if humanize_time(reduction) != ('%s years' % year):
                reduction_year = (datetime.now(tzlocal()) - relativedelta(years=year + 1)).year
            else:
                reduction_year = reduction.year
        elif '月前' in time_until_now:
            month = int(time_until_now.split('月前')[0])
            reduction_year = (datetime.now(tzlocal()) - relativedelta(months=month)).year
        elif '天前' in time_until_now:
            day = int(time_until_now.split('天前')[0])
            reduction_year = (datetime.now(tzlocal()) - relativedelta(days=day)).year

        return datetime(reduction_year, date.month, date.day, date.hour, date.second, tzinfo=pytz.utc)
Exemplo n.º 28
0
def genero_feed(puntateList):
    if puntateList:
        # Creo un nuovo podcast
        p = Podcast()

        p.name = "Pascal Rai Radio 2"
        p.description = "Pascal un programma di Matteo Caccia in onda su Radio2 che racconta storie di vita. Episodi grandi o piccoli, stravolgenti o minuti, momenti che hanno modificato per sempre la nostra vita o che, anche se di poco, l'hanno indirizzata. Storie che sono il termometro della temperatura di ognuno di noi e che in parte raccontano chi siamo. "
        p.website = "http://www.raiplayradio.it/programmi/pascal/"
        p.explicit = True
        p.image = "https://rss.draghetti.it/pascal_image.jpg"
        p.feed_url = "https://rss.draghetti.it/pascal.xml"
        p.copyright = "Rai Radio 2"
        p.language = "it-IT"

        for puntata in puntateList:
            episode = Episode()

            episode.title = puntata[0].encode("ascii", "ignore")
            episode.link = puntata[1]

            # La dimensione del file e approssimativa
            episode.media = Media(puntata[3], puntata[4])

            if puntata[2]:
                episode.publication_date = datetime.datetime(
                    int(puntata[2].split("/")[2]),
                    int(puntata[2].split("/")[1]),
                    int(puntata[2].split("/")[0]),
                    20,
                    00,
                    tzinfo=pytz.utc)
            else:
                episode.publication_date = pytz.utc.localize(
                    datetime.datetime.utcnow())

            p.episodes.append(episode)

        # Print to stdout, just as an example
        p.rss_file(rssfile, minimize=False)
Exemplo n.º 29
0
class Ximalaya():
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.page_size = 30
        self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}"
        self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}"
        self.detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?device=android&trackId={}"
        self.album_url = "https://www.ximalaya.com/album/{}"
        self.time_api = 'https://www.ximalaya.com/revision/time'
        self.s = requests.session()
        self.header = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'User-Agent':
            'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
            'Content-Type': 'application/json;charset=UTF-8',
            'Referer': self.album_url.format(self.album_id),
            'Accept-Encoding': "gzip, deflate",
            'Connection': "keep-alive",
            'cache-control': "no-cache",
        }

    def album(self):
        self.get_sign()
        album_info = self.s.get(self.album_info_url.format(self.album_id),
                                headers=self.header).content
        album_info_content = json.loads(album_info.decode('utf-8'))
        if album_info_content['ret'] == 200:
            album_info_data = album_info_content['data']

            # 初始化
            self.podcast = Podcast()
            self.podcast.name = album_info_data['mainInfo']['albumTitle']
            self.podcast.authors.append(
                Person("Powered by forecho", '*****@*****.**'))
            self.podcast.website = self.album_url.format(self.album_id)
            self.podcast.copyright = 'cc-by'
            if album_info_data['mainInfo']['richIntro']:
                self.podcast.description = album_info_data['mainInfo'][
                    'richIntro']
            else:
                self.podcast.description = self.podcast.name
            self.podcast.language = 'cn'
            self.podcast.image = 'https:' + album_info_data['mainInfo'][
                'cover'].split('!')[0]
            self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
            self.podcast.category = Category('Technology', 'Podcasting')
            self.podcast.explicit = False
            self.podcast.complete = False
            self.podcast.owner = Person("forecho", '*****@*****.**')
            page_num = 1
            # py2 +1
            track_total_count = math.ceil(
                album_info_data['tracksInfo']['trackTotalCount'] /
                self.page_size) + 1
            while page_num <= track_total_count:
                self.header["Host"] = "www.ximalaya.com"
                album_list = self.s.get(self.album_list_url.format(
                    self.album_id, page_num, self.page_size),
                                        headers=self.header).content
                album_list_content = json.loads(album_list.decode('utf-8'))
                count = len(album_list_content['data']['tracksAudioPlay'])
                for each in album_list_content['data']['tracksAudioPlay']:
                    try:
                        self.header["Host"] = "mobile.ximalaya.com"
                        detail = requests.get(self.detail_url.format(
                            each['trackId']),
                                              headers=self.header).content
                        detail_content = json.loads(detail.decode('utf-8'))
                        episode = self.podcast.add_episode()
                        episode.id = str(each['index'])
                        episode.title = each['trackName']
                        print(self.podcast.name + '=====' + each['trackName'])
                        image = each['trackCoverPath'].split('!')[0]
                        if image[-4:] == '.png' or image[-4:] == '.jpg':
                            episode.image = 'https:' + image
                        else:
                            episode.image = self.podcast.image
                        if 'intro' in detail_content:
                            episode.summary = detail_content['intro'].replace(
                                '\r\n', '')
                        else:
                            episode.summary = each['trackName']
                        episode.link = 'http://www.ximalaya.com%s' % each[
                            'albumUrl']
                        episode.authors = [
                            Person("forecho", '*****@*****.**')
                        ]
                        episode.publication_date = self.reduction_time(
                            detail_content['createdAt'])
                        episode.media = Media(each['src'], each['duration'])
                        episode.position = count - each['index'] + 1
                    except Exception as e:
                        print('异常:', e)
                        print('异常 URL:',
                              'https://www.ximalaya.com%s' % each['trackUrl'])
                        traceback.print_exc()
                # 生成文件
                # print self.podcast.rss_str()
                page_num = page_num + 1
            self.podcast.rss_file('ximalaya/%s.rss' % self.album_id,
                                  minimize=True)

    def get_time(self):
        """
        获取服务器时间戳
        :return:
        """
        r = self.s.get(self.time_api, headers=self.header)
        return r.text

    def get_sign(self):
        """
        获取sign: md5(ximalaya-服务器时间戳)(100以内随机数)服务器时间戳(100以内随机数)现在时间戳
        :return: xm_sign
        """
        now_time = str(round(time.time() * 1000))
        server_time = self.get_time()
        sign = str(
            hashlib.md5("himalaya-{}".format(server_time).encode()).hexdigest(
            )) + "({})".format(str(round(
                random.random() * 100))) + server_time + "({})".format(
                    str(round(random.random() * 100))) + now_time
        self.header["xm-sign"] = sign
        # print(sign)
        # return sign

    # 时间转换 参数 毫秒时间戳
    @staticmethod
    def reduction_time(time):
        timestamp = datetime.fromtimestamp(time / 1000)
        return datetime(timestamp.year,
                        timestamp.month,
                        timestamp.day,
                        timestamp.hour,
                        timestamp.minute,
                        tzinfo=pytz.utc)
Exemplo n.º 30
0
    def setUp(self):

        fg = Podcast()

        self.nsContent = "http://purl.org/rss/1.0/modules/content/"
        self.nsDc = "http://purl.org/dc/elements/1.1/"
        self.nsItunes = "http://www.itunes.com/dtds/podcast-1.0.dtd"
        self.feed_url = "http://example.com/feeds/myfeed.rss"

        self.name = 'Some Testfeed'

        self.author = Person('John Doe', '*****@*****.**')

        self.website = 'http://example.com'
        self.description = 'This is a cool feed!'
        self.subtitle = 'Coolest of all'

        self.language = 'en'

        self.cloudDomain = 'example.com'
        self.cloudPort = '4711'
        self.cloudPath = '/ws/example'
        self.cloudRegisterProcedure = 'registerProcedure'
        self.cloudProtocol = 'SOAP 1.1'

        self.pubsubhubbub = "http://pubsubhubbub.example.com/"

        self.contributor = {'name':"Contributor Name",
                            'email': 'Contributor email'}
        self.copyright = "The copyright notice"
        self.docs = 'http://www.rssboard.org/rss-specification'
        self.skip_days = set(['Tuesday'])
        self.skip_hours = set([23])

        self.explicit = False

        self.programname = podgen.version.name

        self.web_master = Person(email='*****@*****.**')
        self.image = "http://example.com/static/podcast.png"
        self.owner = self.author
        self.complete = True
        self.new_feed_url = "https://example.com/feeds/myfeed2.rss"
        self.xslt = "http://example.com/feed/stylesheet.xsl"


        fg.name = self.name
        fg.website = self.website
        fg.description = self.description
        fg.subtitle = self.subtitle
        fg.language = self.language
        fg.cloud = (self.cloudDomain, self.cloudPort, self.cloudPath,
                    self.cloudRegisterProcedure, self.cloudProtocol)
        fg.pubsubhubbub = self.pubsubhubbub
        fg.copyright = self.copyright
        fg.authors.append(self.author)
        fg.skip_days = self.skip_days
        fg.skip_hours = self.skip_hours
        fg.web_master = self.web_master
        fg.feed_url = self.feed_url
        fg.explicit = self.explicit
        fg.image = self.image
        fg.owner = self.owner
        fg.complete = self.complete
        fg.new_feed_url = self.new_feed_url
        fg.xslt = self.xslt

        self.fg = fg

        warnings.simplefilter("always")
        def noop(*args, **kwargs):
            pass
        warnings.showwarning = noop
Exemplo n.º 31
0
    def album(self):
        album_info = requests.get(self.album_info_url.format(self.album_id),
                                  headers=self.header).content
        album_info_content = json.loads(album_info.decode('utf-8'))
        if album_info_content['ret'] == 200:
            album_info_data = album_info_content['data']

            # 初始化
            self.podcast = Podcast()
            self.podcast.name = album_info_data['mainInfo']['albumTitle']
            self.podcast.authors.append(
                Person("Powered by forecho", '*****@*****.**'))
            self.podcast.website = self.album_url.format(self.album_id)
            self.podcast.copyright = 'cc-by'
            if album_info_data['mainInfo']['richIntro']:
                self.podcast.description = album_info_data['mainInfo'][
                    'richIntro']
            else:
                self.podcast.description = self.podcast.name
            self.podcast.language = 'cn'
            self.podcast.image = 'https:' + album_info_data['mainInfo'][
                'cover'].split('!')[0]
            self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
            self.podcast.category = Category('Technology', 'Podcasting')
            self.podcast.explicit = False
            self.podcast.complete = False
            self.podcast.owner = Person("forecho", '*****@*****.**')
            page_num = 1
            # py2 +1
            track_total_count = math.ceil(
                album_info_data['tracksInfo']['trackTotalCount'] /
                self.page_size) + 1
            while page_num <= track_total_count:
                album_list = requests.get(self.album_list_url.format(
                    self.album_id, page_num, self.page_size),
                                          headers=self.header).content
                album_list_content = json.loads(album_list.decode('utf-8'))
                count = len(album_list_content['data']['tracksAudioPlay'])
                for each in album_list_content['data']['tracksAudioPlay']:
                    try:
                        detail = requests.get(self.detail_url.format(
                            each['trackId']),
                                              headers=self.header).content
                        detail_content = json.loads(detail.decode('utf-8'))
                        episode = self.podcast.add_episode()
                        episode.id = str(each['index'])
                        episode.title = each['trackName']
                        print(self.podcast.name + '=====' + each['trackName'])
                        image = each['trackCoverPath'].split('!')[0]
                        if image[-4:] == '.png' or image[-4:] == '.jpg':
                            episode.image = 'https:' + image
                        else:
                            episode.image = self.podcast.image
                        if 'intro' in detail_content:
                            episode.summary = detail_content['intro'].replace(
                                '\r\n', '')
                        else:
                            episode.summary = each['trackName']
                        episode.link = 'http://www.ximalaya.com%s' % each[
                            'albumUrl']
                        episode.authors = [
                            Person("forecho", '*****@*****.**')
                        ]
                        episode.publication_date = self.reduction_time(
                            detail_content['createdAt'])
                        episode.media = Media(each['src'], each['duration'])
                        episode.position = count - each['index'] + 1
                    except Exception as e:
                        print('异常:', e)
                        print('异常 URL:',
                              'https://www.ximalaya.com%s' % each['trackUrl'])
                        traceback.print_exc()
                # 生成文件
                # print self.podcast.rss_str()
                page_num = page_num + 1
            self.podcast.rss_file('ximalaya/%s.rss' % self.album_id,
                                  minimize=True)
Exemplo n.º 32
0
class TestPodcast(unittest.TestCase):
    def setUp(self):
        self.existing_locale = locale.setlocale(locale.LC_ALL, None)
        locale.setlocale(locale.LC_ALL, 'C')

        fg = Podcast()

        self.nsContent = "http://purl.org/rss/1.0/modules/content/"
        self.nsDc = "http://purl.org/dc/elements/1.1/"
        self.nsItunes = "http://www.itunes.com/dtds/podcast-1.0.dtd"
        self.feed_url = "http://example.com/feeds/myfeed.rss"

        self.name = 'Some Testfeed'

        # Use character not in ASCII to catch encoding errors
        self.author = Person('Jon Døll', '*****@*****.**')

        self.website = 'http://example.com'
        self.description = 'This is a cool feed!'
        self.subtitle = 'Coolest of all'

        self.language = 'en'

        self.cloudDomain = 'example.com'
        self.cloudPort = '4711'
        self.cloudPath = '/ws/example'
        self.cloudRegisterProcedure = 'registerProcedure'
        self.cloudProtocol = 'SOAP 1.1'

        self.pubsubhubbub = "http://pubsubhubbub.example.com/"

        self.contributor = {
            'name': "Contributor Name",
            'email': 'Contributor email'
        }
        self.copyright = "The copyright notice"
        self.docs = 'http://www.rssboard.org/rss-specification'
        self.skip_days = set(['Tuesday'])
        self.skip_hours = set([23])

        self.explicit = False

        self.programname = podgen.version.name

        self.web_master = Person(email='*****@*****.**')
        self.image = "http://example.com/static/podcast.png"
        self.owner = self.author
        self.complete = True
        self.new_feed_url = "https://example.com/feeds/myfeed2.rss"
        self.xslt = "http://example.com/feed/stylesheet.xsl"

        fg.name = self.name
        fg.website = self.website
        fg.description = self.description
        fg.subtitle = self.subtitle
        fg.language = self.language
        fg.cloud = (self.cloudDomain, self.cloudPort, self.cloudPath,
                    self.cloudRegisterProcedure, self.cloudProtocol)
        fg.pubsubhubbub = self.pubsubhubbub
        fg.copyright = self.copyright
        fg.authors.append(self.author)
        fg.skip_days = self.skip_days
        fg.skip_hours = self.skip_hours
        fg.web_master = self.web_master
        fg.feed_url = self.feed_url
        fg.explicit = self.explicit
        fg.image = self.image
        fg.owner = self.owner
        fg.complete = self.complete
        fg.new_feed_url = self.new_feed_url
        fg.xslt = self.xslt

        self.fg = fg

        warnings.simplefilter("always")

        def noop(*args, **kwargs):
            pass

        warnings.showwarning = noop

    def tearDown(self):
        locale.setlocale(locale.LC_ALL, self.existing_locale)

    def test_constructor(self):
        # Overwrite fg from setup
        self.fg = Podcast(
            name=self.name,
            website=self.website,
            description=self.description,
            subtitle=self.subtitle,
            language=self.language,
            cloud=(self.cloudDomain, self.cloudPort, self.cloudPath,
                   self.cloudRegisterProcedure, self.cloudProtocol),
            pubsubhubbub=self.pubsubhubbub,
            copyright=self.copyright,
            authors=[self.author],
            skip_days=self.skip_days,
            skip_hours=self.skip_hours,
            web_master=self.web_master,
            feed_url=self.feed_url,
            explicit=self.explicit,
            image=self.image,
            owner=self.owner,
            complete=self.complete,
            new_feed_url=self.new_feed_url,
            xslt=self.xslt,
        )
        # Test that the fields are actually set
        self.test_baseFeed()

    def test_constructorUnknownAttributes(self):
        self.assertRaises(TypeError, Podcast, naem="Oh, looks like a typo")
        self.assertRaises(TypeError, Podcast, "Haha, No Keyword")

    def test_baseFeed(self):
        fg = self.fg

        assert fg.name == self.name

        assert fg.authors[0] == self.author
        assert fg.web_master == self.web_master

        assert fg.website == self.website

        assert fg.description == self.description
        assert fg.subtitle == self.subtitle

        assert fg.language == self.language
        assert fg.feed_url == self.feed_url
        assert fg.image == self.image
        assert fg.owner == self.owner
        assert fg.complete == self.complete
        assert fg.pubsubhubbub == self.pubsubhubbub
        assert fg.cloud == (self.cloudDomain, self.cloudPort, self.cloudPath,
                            self.cloudRegisterProcedure, self.cloudProtocol)
        assert fg.copyright == self.copyright
        assert fg.new_feed_url == self.new_feed_url
        assert fg.skip_days == self.skip_days
        assert fg.skip_hours == self.skip_hours
        assert fg.xslt == self.xslt

    def test_rssFeedFile(self):
        fg = self.fg
        rssString = self.getRssFeedFileContents(fg, xml_declaration=False)\
            .replace('\n', '')
        self.checkRssString(rssString)

    def getRssFeedFileContents(self, fg, **kwargs):
        # Keep track of our temporary file and its filename
        filename = None
        file = None
        encoding = 'UTF-8'
        try:
            # Get our temporary file name
            file = tempfile.NamedTemporaryFile(delete=False)
            filename = file.name
            # Close the file; we will just use its name
            file.close()
            # Write the RSS to the file (overwriting it)
            fg.rss_file(filename=filename, encoding=encoding, **kwargs)
            # Read the resulting RSS
            with open(filename, "r", encoding=encoding) as myfile:
                rssString = myfile.read()
        finally:
            # We don't need the file any longer, so delete it
            if filename:
                os.unlink(filename)
            elif file:
                # Ops, we were interrupted between the first and second stmt
                filename = file.name
                file.close()
                os.unlink(filename)
            else:
                # We were interrupted between entering the try-block and
                # getting the temporary file. Not much we can do.
                pass
        return rssString

    def test_rssFeedString(self):
        fg = self.fg
        rssString = fg.rss_str(xml_declaration=False)
        self.checkRssString(rssString)

    def test_rssStringAndFileAreEqual(self):
        rss_string = self.fg.rss_str()
        rss_file = self.getRssFeedFileContents(self.fg)
        self.assertEqual(rss_string, rss_file)

    def checkRssString(self, rssString):
        feed = etree.fromstring(rssString)
        nsRss = self.nsContent
        nsAtom = "http://www.w3.org/2005/Atom"

        channel = feed.find("channel")
        assert channel != None

        assert channel.find("title").text == self.name
        assert channel.find("description").text == self.description
        assert channel.find("{%s}subtitle" % self.nsItunes).text == \
            self.subtitle
        assert channel.find("link").text == self.website
        assert channel.find("lastBuildDate").text != None
        assert channel.find("language").text == self.language
        assert channel.find(
            "docs").text == "http://www.rssboard.org/rss-specification"
        assert self.programname in channel.find("generator").text
        assert channel.find("cloud").get('domain') == self.cloudDomain
        assert channel.find("cloud").get('port') == self.cloudPort
        assert channel.find("cloud").get('path') == self.cloudPath
        assert channel.find("cloud").get(
            'registerProcedure') == self.cloudRegisterProcedure
        assert channel.find("cloud").get('protocol') == self.cloudProtocol
        assert channel.find("copyright").text == self.copyright
        assert channel.find("docs").text == self.docs
        assert self.author.email in channel.find("managingEditor").text
        assert channel.find("skipDays").find("day").text in self.skip_days
        assert int(
            channel.find("skipHours").find("hour").text) in self.skip_hours
        assert self.web_master.email in channel.find("webMaster").text

        links = channel.findall("{%s}link" % nsAtom)
        selflinks = [link for link in links if link.get('rel') == 'self']
        hublinks = [link for link in links if link.get('rel') == 'hub']

        assert selflinks, "No <atom:link rel='self'> element found"
        selflink = selflinks[0]
        assert selflink.get('href') == self.feed_url
        assert selflink.get('type') == 'application/rss+xml'

        assert hublinks, "No <atom:link rel='hub'> element found"
        hublink = hublinks[0]
        assert hublink.get('href') == self.pubsubhubbub
        assert hublink.get('type') is None

        assert channel.find("{%s}image" % self.nsItunes).get('href') == \
            self.image
        owner = channel.find("{%s}owner" % self.nsItunes)
        assert owner.find("{%s}name" % self.nsItunes).text == self.owner.name
        assert owner.find("{%s}email" % self.nsItunes).text == self.owner.email
        assert channel.find("{%s}complete" % self.nsItunes).text.lower() == \
            "yes"
        assert channel.find("{%s}new-feed-url" % self.nsItunes).text == \
            self.new_feed_url

    def test_feedUrlValidation(self):
        self.assertRaises(ValueError, setattr, self.fg, "feed_url",
                          "example.com/feed.rss")

    def test_generator(self):
        software_name = "My Awesome Software"
        software_version = (1, 0)
        software_url = "http://example.com/awesomesoft/"

        # Using set_generator, text includes python-podgen
        self.fg.set_generator(software_name)
        rss = self.fg._create_rss()
        generator = rss.find("channel").find("generator").text
        assert software_name in generator
        assert self.programname in generator

        # Using set_generator, text excludes python-podgen
        self.fg.set_generator(software_name, exclude_podgen=True)
        generator = self.fg._create_rss().find("channel").find(
            "generator").text
        assert software_name in generator
        assert self.programname not in generator

        # Using set_generator, text includes name, version and url
        self.fg.set_generator(software_name, software_version, software_url)
        generator = self.fg._create_rss().find("channel").find(
            "generator").text
        assert software_name in generator
        assert str(software_version[0]) in generator
        assert str(software_version[1]) in generator
        assert software_url in generator

        # Using generator directly, text excludes python-podgen
        self.fg.generator = software_name
        generator = self.fg._create_rss().find("channel").find(
            "generator").text
        assert software_name in generator
        assert self.programname not in generator

    def test_str(self):
        assert str(self.fg) == self.fg.rss_str(minimize=False,
                                               encoding="UTF-8",
                                               xml_declaration=True)

    def test_updated(self):
        date = datetime.datetime(2016, 1, 1, 0, 10, tzinfo=dateutil.tz.tzutc())

        def getLastBuildDateElement(fg):
            return fg._create_rss().find("channel").find("lastBuildDate")

        # Test that it has a default
        assert getLastBuildDateElement(self.fg) is not None

        # Test that it respects my custom value
        self.fg.last_updated = date
        lastBuildDate = getLastBuildDateElement(self.fg)
        assert lastBuildDate is not None
        assert dateutil.parser.parse(lastBuildDate.text) == date

        # Test that it is left out when set to False
        self.fg.last_updated = False
        lastBuildDate = getLastBuildDateElement(self.fg)
        assert lastBuildDate is None

    def test_AuthorEmail(self):
        # Just email - so use managingEditor, not dc:creator or itunes:author
        # This is per the RSS best practices, see the section about dc:creator
        self.fg.authors = [Person(None, "*****@*****.**")]
        channel = self.fg._create_rss().find("channel")
        # managingEditor uses email?
        assert channel.find("managingEditor").text == self.fg.authors[0].email
        # No dc:creator?
        assert channel.find("{%s}creator" % self.nsDc) is None
        # No itunes:author?
        assert channel.find("{%s}author" % self.nsItunes) is None

    def test_AuthorName(self):
        # Just name - use dc:creator and itunes:author, not managingEditor
        self.fg.authors = [Person("Just a. Name")]
        channel = self.fg._create_rss().find("channel")
        # No managingEditor?
        assert channel.find("managingEditor") is None
        # dc:creator equals name?
        assert channel.find("{%s}creator" % self.nsDc).text == \
               self.fg.authors[0].name
        # itunes:author equals name?
        assert channel.find("{%s}author" % self.nsItunes).text == \
            self.fg.authors[0].name

    def test_AuthorNameAndEmail(self):
        # Both name and email - use managingEditor and itunes:author,
        # not dc:creator
        self.fg.authors = [Person("Both a name", "*****@*****.**")]
        channel = self.fg._create_rss().find("channel")
        # Does managingEditor follow the pattern "email (name)"?
        self.assertEqual(
            self.fg.authors[0].email + " (" + self.fg.authors[0].name + ")",
            channel.find("managingEditor").text)
        # No dc:creator?
        assert channel.find("{%s}creator" % self.nsDc) is None
        # itunes:author uses name only?
        assert channel.find("{%s}author" % self.nsItunes).text == \
            self.fg.authors[0].name

    def test_multipleAuthors(self):
        # Multiple authors - use itunes:author and dc:creator, not
        # managingEditor.

        person1 = Person("Multiple", "*****@*****.**")
        person2 = Person("Are", "*****@*****.**")
        self.fg.authors = [person1, person2]
        channel = self.fg._create_rss().find("channel")

        # Test dc:creator
        author_elements = \
            channel.findall("{%s}creator" % self.nsDc)
        author_texts = [e.text for e in author_elements]

        assert len(author_texts) == 2
        assert person1.name in author_texts[0]
        assert person1.email in author_texts[0]
        assert person2.name in author_texts[1]
        assert person2.email in author_texts[1]

        # Test itunes:author
        itunes_author = channel.find("{%s}author" % self.nsItunes)
        assert itunes_author is not None
        itunes_author_text = itunes_author.text
        assert person1.name in itunes_author_text
        assert person1.email not in itunes_author_text
        assert person2.name in itunes_author_text
        assert person2.email not in itunes_author_text

        # Test that managingEditor is not used
        assert channel.find("managingEditor") is None

    def test_authorsInvalidValue(self):
        self.assertRaises(TypeError, self.do_authorsInvalidValue)

    def do_authorsInvalidValue(self):
        self.fg.authors = Person("Opsie", "*****@*****.**")

    def test_webMaster(self):
        self.fg.web_master = Person(None, "*****@*****.**")
        channel = self.fg._create_rss().find("channel")
        assert channel.find("webMaster").text == self.fg.web_master.email

        self.assertRaises(ValueError, setattr, self.fg, "web_master",
                          Person("Mr. No Email Address"))

        self.fg.web_master = Person("Both a name", "*****@*****.**")
        channel = self.fg._create_rss().find("channel")
        # Does webMaster follow the pattern "email (name)"?
        self.assertEqual(
            self.fg.web_master.email + " (" + self.fg.web_master.name + ")",
            channel.find("webMaster").text)

    def test_categoryWithoutSubcategory(self):
        c = Category("Arts")
        self.fg.category = c
        channel = self.fg._create_rss().find("channel")
        itunes_category = channel.find("{%s}category" % self.nsItunes)
        assert itunes_category is not None

        self.assertEqual(itunes_category.get("text"), c.category)

        assert itunes_category.find("{%s}category" % self.nsItunes) is None

    def test_categoryWithSubcategory(self):
        c = Category("Arts", "Food")
        self.fg.category = c
        channel = self.fg._create_rss().find("channel")
        itunes_category = channel.find("{%s}category" % self.nsItunes)
        assert itunes_category is not None
        itunes_subcategory = itunes_category\
            .find("{%s}category" % self.nsItunes)
        assert itunes_subcategory is not None
        self.assertEqual(itunes_subcategory.get("text"), c.subcategory)

    def test_categoryChecks(self):
        c = ("Arts", "Food")
        self.assertRaises(TypeError, setattr, self.fg, "category", c)

    def test_explicitIsExplicit(self):
        self.fg.explicit = True
        channel = self.fg._create_rss().find("channel")
        itunes_explicit = channel.find("{%s}explicit" % self.nsItunes)
        assert itunes_explicit is not None
        assert itunes_explicit.text.lower() in ("yes", "explicit", "true"),\
            "itunes:explicit was %s, expected yes, explicit or true" \
            % itunes_explicit.text

    def test_explicitIsClean(self):
        self.fg.explicit = False
        channel = self.fg._create_rss().find("channel")
        itunes_explicit = channel.find("{%s}explicit" % self.nsItunes)
        assert itunes_explicit is not None
        assert itunes_explicit.text.lower() in ("no", "clean", "false"),\
            "itunes:explicit was %s, expected no, clean or false" \
            % itunes_explicit.text

    def test_mandatoryValues(self):
        # Try to create a Podcast once for each mandatory property.
        # On each iteration, exactly one of the properties is not set.
        # Therefore, an exception should be thrown on each iteration.
        mandatory_properties = set([
            "description",
            "title",
            "link",
            "explicit",
        ])

        for test_property in mandatory_properties:
            fg = Podcast()
            if test_property != "description":
                fg.description = self.description
            if test_property != "title":
                fg.name = self.name
            if test_property != "link":
                fg.website = self.website
            if test_property != "explicit":
                fg.explicit = self.explicit
            try:
                self.assertRaises(ValueError, fg._create_rss)
            except AssertionError as e:
                raise_from(
                    AssertionError("The test failed for %s" % test_property),
                    e)

    def test_withholdFromItunesOffByDefault(self):
        assert not self.fg.withhold_from_itunes

    def test_withholdFromItunes(self):
        self.fg.withhold_from_itunes = True
        itunes_block = self.fg._create_rss().find("channel")\
            .find("{%s}block" % self.nsItunes)
        assert itunes_block is not None
        self.assertEqual(itunes_block.text.lower(), "yes")

        self.fg.withhold_from_itunes = False
        itunes_block = self.fg._create_rss().find("channel")\
            .find("{%s}block" % self.nsItunes)
        assert itunes_block is None

    def test_modifyingSkipDaysAfterwards(self):
        self.fg.skip_days.add("Unrecognized day")
        self.assertRaises(ValueError, self.fg.rss_str)
        self.fg.skip_days.remove("Unrecognized day")
        self.fg.rss_str()  # Now it works

    def test_modifyingSkipHoursAfterwards(self):
        self.fg.skip_hours.add(26)
        self.assertRaises(ValueError, self.fg.rss_str)
        self.fg.skip_hours.remove(26)
        self.fg.rss_str()  # Now it works

    # Tests for xslt
    def test_xslt_str(self):
        def use_str(**kwargs):
            return self.fg.rss_str(**kwargs)

        self.help_test_xslt_using(use_str)

    def test_xslt_file(self):
        def use_file(**kwargs):
            return self.getRssFeedFileContents(self.fg, **kwargs)

        self.help_test_xslt_using(use_file)

    def help_test_xslt_using(self, generated_feed):
        """Run tests for xslt, generating the feed str using the given function.
        """
        xslt_path = "http://example.com/mystylesheet.xsl"
        xslt_pi = "<?xml-stylesheet"

        # No xslt when set to None
        self.fg.xslt = None
        assert xslt_pi not in generated_feed()
        assert xslt_pi not in generated_feed(minimize=True)
        assert xslt_pi not in generated_feed(xml_declaration=False)

        self.fg.xslt = xslt_path

        # Now we have the stylesheet in there
        assert xslt_pi in generated_feed()
        assert xslt_pi in generated_feed(minimize=True)
        assert xslt_pi in generated_feed(xml_declaration=False)

        assert xslt_path in generated_feed()
        assert xslt_path in generated_feed(minimize=True)
        assert xslt_path in generated_feed(xml_declaration=False)

    def test_imageWarningNoExt(self):
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            self.assertEqual(len(w), 0)

            # Set image to a URL without proper file extension
            no_ext = "http://static.example.com/images/logo"
            self.fg.image = no_ext
            # Did we get a warning?
            self.assertEqual(1, len(w))
            assert issubclass(w.pop().category, NotSupportedByItunesWarning)
            # Was the image set?
            self.assertEqual(no_ext, self.fg.image)

    def test_imageWarningBadExt(self):
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")

            # Set image to a URL with an unsupported file extension
            bad_ext = "http://static.example.com/images/logo.gif"
            self.fg.image = bad_ext
            # Did we get a warning?
            self.assertEqual(1, len(w))
            # Was it of the correct type?
            assert issubclass(w.pop().category, NotSupportedByItunesWarning)
            # Was the image still set?
            self.assertEqual(bad_ext, self.fg.image)

    def test_imageNoWarningWithGoodExt(self):
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")

            # Set image to a URL with a supported file extension
            extensions = ["jpg", "png", "jpeg"]
            for extension in extensions:
                good_ext = "http://static.example.com/images/logo." + extension
                self.fg.image = good_ext
                # Did we get no warning?
                self.assertEqual(
                    0, len(w),
                    "Extension %s raised warnings (%s)" % (extension, w))
                # Was the image set?
                self.assertEqual(good_ext, self.fg.image)
Exemplo n.º 33
0
class TestPodcast(unittest.TestCase):

    def setUp(self):

        fg = Podcast()

        self.nsContent = "http://purl.org/rss/1.0/modules/content/"
        self.nsDc = "http://purl.org/dc/elements/1.1/"
        self.nsItunes = "http://www.itunes.com/dtds/podcast-1.0.dtd"
        self.feed_url = "http://example.com/feeds/myfeed.rss"

        self.name = 'Some Testfeed'

        self.author = Person('John Doe', '*****@*****.**')

        self.website = 'http://example.com'
        self.description = 'This is a cool feed!'
        self.subtitle = 'Coolest of all'

        self.language = 'en'

        self.cloudDomain = 'example.com'
        self.cloudPort = '4711'
        self.cloudPath = '/ws/example'
        self.cloudRegisterProcedure = 'registerProcedure'
        self.cloudProtocol = 'SOAP 1.1'

        self.pubsubhubbub = "http://pubsubhubbub.example.com/"

        self.contributor = {'name':"Contributor Name",
                            'email': 'Contributor email'}
        self.copyright = "The copyright notice"
        self.docs = 'http://www.rssboard.org/rss-specification'
        self.skip_days = set(['Tuesday'])
        self.skip_hours = set([23])

        self.explicit = False

        self.programname = podgen.version.name

        self.web_master = Person(email='*****@*****.**')
        self.image = "http://example.com/static/podcast.png"
        self.owner = self.author
        self.complete = True
        self.new_feed_url = "https://example.com/feeds/myfeed2.rss"
        self.xslt = "http://example.com/feed/stylesheet.xsl"


        fg.name = self.name
        fg.website = self.website
        fg.description = self.description
        fg.subtitle = self.subtitle
        fg.language = self.language
        fg.cloud = (self.cloudDomain, self.cloudPort, self.cloudPath,
                    self.cloudRegisterProcedure, self.cloudProtocol)
        fg.pubsubhubbub = self.pubsubhubbub
        fg.copyright = self.copyright
        fg.authors.append(self.author)
        fg.skip_days = self.skip_days
        fg.skip_hours = self.skip_hours
        fg.web_master = self.web_master
        fg.feed_url = self.feed_url
        fg.explicit = self.explicit
        fg.image = self.image
        fg.owner = self.owner
        fg.complete = self.complete
        fg.new_feed_url = self.new_feed_url
        fg.xslt = self.xslt

        self.fg = fg

        warnings.simplefilter("always")
        def noop(*args, **kwargs):
            pass
        warnings.showwarning = noop

    def test_constructor(self):
        # Overwrite fg from setup
        self.fg = Podcast(
            name=self.name,
            website=self.website,
            description=self.description,
            subtitle=self.subtitle,
            language=self.language,
            cloud=(self.cloudDomain, self.cloudPort, self.cloudPath,
                   self.cloudRegisterProcedure, self.cloudProtocol),
            pubsubhubbub=self.pubsubhubbub,
            copyright=self.copyright,
            authors=[self.author],
            skip_days=self.skip_days,
            skip_hours=self.skip_hours,
            web_master=self.web_master,
            feed_url=self.feed_url,
            explicit=self.explicit,
            image=self.image,
            owner=self.owner,
            complete=self.complete,
            new_feed_url=self.new_feed_url,
            xslt=self.xslt,
        )
        # Test that the fields are actually set
        self.test_baseFeed()

    def test_constructorUnknownAttributes(self):
        self.assertRaises(TypeError, Podcast, naem="Oh, looks like a typo")
        self.assertRaises(TypeError, Podcast, "Haha, No Keyword")

    def test_baseFeed(self):
        fg = self.fg

        assert fg.name == self.name

        assert fg.authors[0] == self.author
        assert fg.web_master == self.web_master

        assert fg.website == self.website

        assert fg.description == self.description
        assert fg.subtitle == self.subtitle

        assert fg.language == self.language
        assert fg.feed_url == self.feed_url
        assert fg.image == self.image
        assert fg.owner == self.owner
        assert fg.complete == self.complete
        assert fg.pubsubhubbub == self.pubsubhubbub
        assert fg.cloud == (self.cloudDomain, self.cloudPort, self.cloudPath,
                            self.cloudRegisterProcedure, self.cloudProtocol)
        assert fg.copyright == self.copyright
        assert fg.new_feed_url == self.new_feed_url
        assert fg.skip_days == self.skip_days
        assert fg.skip_hours == self.skip_hours
        assert fg.xslt == self.xslt

    def test_rssFeedFile(self):
        fg = self.fg
        rssString = self.getRssFeedFileContents(fg, xml_declaration=False)\
            .replace('\n', '')
        self.checkRssString(rssString)

    def getRssFeedFileContents(self, fg, **kwargs):
        # Keep track of our temporary file and its filename
        filename = None
        file = None
        try:
            # Get our temporary file name
            file = tempfile.NamedTemporaryFile(delete=False)
            filename = file.name
            # Close the file; we will just use its name
            file.close()
            # Write the RSS to the file (overwriting it)
            fg.rss_file(filename=filename, **kwargs)
            # Read the resulting RSS
            with open(filename, "r") as myfile:
                rssString = myfile.read()
        finally:
            # We don't need the file any longer, so delete it
            if filename:
                os.unlink(filename)
            elif file:
                # Ops, we were interrupted between the first and second stmt
                filename = file.name
                file.close()
                os.unlink(filename)
            else:
                # We were interrupted between entering the try-block and
                # getting the temporary file. Not much we can do.
                pass
        return rssString


    def test_rssFeedString(self):
        fg = self.fg
        rssString = fg.rss_str(xml_declaration=False)
        self.checkRssString(rssString)

    def test_rssStringAndFileAreEqual(self):
        rss_string = self.fg.rss_str()
        rss_file = self.getRssFeedFileContents(self.fg)
        self.assertEqual(rss_string, rss_file)

    def checkRssString(self, rssString):
        feed = etree.fromstring(rssString)
        nsRss = self.nsContent
        nsAtom = "http://www.w3.org/2005/Atom"

        channel = feed.find("channel")
        assert channel != None

        assert channel.find("title").text == self.name
        assert channel.find("description").text == self.description
        assert channel.find("{%s}subtitle" % self.nsItunes).text == \
            self.subtitle
        assert channel.find("link").text == self.website
        assert channel.find("lastBuildDate").text != None
        assert channel.find("language").text == self.language
        assert channel.find("docs").text == "http://www.rssboard.org/rss-specification"
        assert self.programname in channel.find("generator").text
        assert channel.find("cloud").get('domain') == self.cloudDomain
        assert channel.find("cloud").get('port') == self.cloudPort
        assert channel.find("cloud").get('path') == self.cloudPath
        assert channel.find("cloud").get('registerProcedure') == self.cloudRegisterProcedure
        assert channel.find("cloud").get('protocol') == self.cloudProtocol
        assert channel.find("copyright").text == self.copyright
        assert channel.find("docs").text == self.docs
        assert self.author.email in channel.find("managingEditor").text
        assert channel.find("skipDays").find("day").text in self.skip_days
        assert int(channel.find("skipHours").find("hour").text) in self.skip_hours
        assert self.web_master.email in channel.find("webMaster").text

        links = channel.findall("{%s}link" % nsAtom)
        selflinks = [link for link in links if link.get('rel') == 'self']
        hublinks = [link for link in links if link.get('rel') == 'hub']

        assert selflinks, "No <atom:link rel='self'> element found"
        selflink = selflinks[0]
        assert selflink.get('href') == self.feed_url
        assert selflink.get('type') == 'application/rss+xml'

        assert hublinks, "No <atom:link rel='hub'> element found"
        hublink = hublinks[0]
        assert hublink.get('href') == self.pubsubhubbub
        assert hublink.get('type') is None

        assert channel.find("{%s}image" % self.nsItunes).get('href') == \
            self.image
        owner = channel.find("{%s}owner" % self.nsItunes)
        assert owner.find("{%s}name" % self.nsItunes).text == self.owner.name
        assert owner.find("{%s}email" % self.nsItunes).text == self.owner.email
        assert channel.find("{%s}complete" % self.nsItunes).text.lower() == \
            "yes"
        assert channel.find("{%s}new-feed-url" % self.nsItunes).text == \
            self.new_feed_url

    def test_feedUrlValidation(self):
        self.assertRaises(ValueError, setattr, self.fg, "feed_url",
                          "example.com/feed.rss")

    def test_generator(self):
        software_name = "My Awesome Software"
        software_version = (1, 0)
        software_url = "http://example.com/awesomesoft/"

        # Using set_generator, text includes python-podgen
        self.fg.set_generator(software_name)
        rss = self.fg._create_rss()
        generator = rss.find("channel").find("generator").text
        assert software_name in generator
        assert self.programname in generator

        # Using set_generator, text excludes python-podgen
        self.fg.set_generator(software_name, exclude_podgen=True)
        generator = self.fg._create_rss().find("channel").find("generator").text
        assert software_name in generator
        assert self.programname not in generator

        # Using set_generator, text includes name, version and url
        self.fg.set_generator(software_name, software_version, software_url)
        generator = self.fg._create_rss().find("channel").find("generator").text
        assert software_name in generator
        assert str(software_version[0]) in generator
        assert str(software_version[1]) in generator
        assert software_url in generator

        # Using generator directly, text excludes python-podgen
        self.fg.generator = software_name
        generator = self.fg._create_rss().find("channel").find("generator").text
        assert software_name in generator
        assert self.programname not in generator

    def test_str(self):
        assert str(self.fg) == self.fg.rss_str(
            minimize=False,
            encoding="UTF-8",
            xml_declaration=True
        )

    def test_updated(self):
        date = datetime.datetime(2016, 1, 1, 0, 10, tzinfo=dateutil.tz.tzutc())

        def getLastBuildDateElement(fg):
            return fg._create_rss().find("channel").find("lastBuildDate")

        # Test that it has a default
        assert getLastBuildDateElement(self.fg) is not None

        # Test that it respects my custom value
        self.fg.last_updated = date
        lastBuildDate = getLastBuildDateElement(self.fg)
        assert lastBuildDate is not None
        assert dateutil.parser.parse(lastBuildDate.text) == date

        # Test that it is left out when set to False
        self.fg.last_updated = False
        lastBuildDate = getLastBuildDateElement(self.fg)
        assert lastBuildDate is None

    def test_AuthorEmail(self):
        # Just email - so use managingEditor, not dc:creator or itunes:author
        # This is per the RSS best practices, see the section about dc:creator
        self.fg.authors = [Person(None, "*****@*****.**")]
        channel = self.fg._create_rss().find("channel")
        # managingEditor uses email?
        assert channel.find("managingEditor").text == self.fg.authors[0].email
        # No dc:creator?
        assert channel.find("{%s}creator" % self.nsDc) is None
        # No itunes:author?
        assert channel.find("{%s}author" % self.nsItunes) is None

    def test_AuthorName(self):
        # Just name - use dc:creator and itunes:author, not managingEditor
        self.fg.authors = [Person("Just a. Name")]
        channel = self.fg._create_rss().find("channel")
        # No managingEditor?
        assert channel.find("managingEditor") is None
        # dc:creator equals name?
        assert channel.find("{%s}creator" % self.nsDc).text == \
               self.fg.authors[0].name
        # itunes:author equals name?
        assert channel.find("{%s}author" % self.nsItunes).text == \
            self.fg.authors[0].name

    def test_AuthorNameAndEmail(self):
        # Both name and email - use managingEditor and itunes:author,
        # not dc:creator
        self.fg.authors = [Person("Both a name", "*****@*****.**")]
        channel = self.fg._create_rss().find("channel")
        # Does managingEditor follow the pattern "email (name)"?
        self.assertEqual(self.fg.authors[0].email +
                         " (" + self.fg.authors[0].name + ")",
                         channel.find("managingEditor").text)
        # No dc:creator?
        assert channel.find("{%s}creator" % self.nsDc) is None
        # itunes:author uses name only?
        assert channel.find("{%s}author" % self.nsItunes).text == \
            self.fg.authors[0].name

    def test_multipleAuthors(self):
        # Multiple authors - use itunes:author and dc:creator, not
        # managingEditor.

        person1 = Person("Multiple", "*****@*****.**")
        person2 = Person("Are", "*****@*****.**")
        self.fg.authors = [person1, person2]
        channel = self.fg._create_rss().find("channel")

        # Test dc:creator
        author_elements = \
            channel.findall("{%s}creator" % self.nsDc)
        author_texts = [e.text for e in author_elements]

        assert len(author_texts) == 2
        assert person1.name in author_texts[0]
        assert person1.email in author_texts[0]
        assert person2.name in author_texts[1]
        assert person2.email in author_texts[1]

        # Test itunes:author
        itunes_author = channel.find("{%s}author" % self.nsItunes)
        assert itunes_author is not None
        itunes_author_text = itunes_author.text
        assert person1.name in itunes_author_text
        assert person1.email not in itunes_author_text
        assert person2.name in itunes_author_text
        assert person2.email not in itunes_author_text

        # Test that managingEditor is not used
        assert channel.find("managingEditor") is None

    def test_authorsInvalidValue(self):
        self.assertRaises(TypeError, self.do_authorsInvalidValue)

    def do_authorsInvalidValue(self):
        self.fg.authors = Person("Opsie", "*****@*****.**")


    def test_webMaster(self):
        self.fg.web_master = Person(None, "*****@*****.**")
        channel = self.fg._create_rss().find("channel")
        assert channel.find("webMaster").text == self.fg.web_master.email

        self.assertRaises(ValueError, setattr, self.fg, "web_master",
                          Person("Mr. No Email Address"))

        self.fg.web_master = Person("Both a name", "*****@*****.**")
        channel = self.fg._create_rss().find("channel")
        # Does webMaster follow the pattern "email (name)"?
        self.assertEqual(self.fg.web_master.email +
                         " (" + self.fg.web_master.name + ")",
                         channel.find("webMaster").text)

    def test_categoryWithoutSubcategory(self):
        c = Category("Arts")
        self.fg.category = c
        channel = self.fg._create_rss().find("channel")
        itunes_category = channel.find("{%s}category" % self.nsItunes)
        assert itunes_category is not None

        self.assertEqual(itunes_category.get("text"), c.category)

        assert itunes_category.find("{%s}category" % self.nsItunes) is None

    def test_categoryWithSubcategory(self):
        c = Category("Arts", "Food")
        self.fg.category = c
        channel = self.fg._create_rss().find("channel")
        itunes_category = channel.find("{%s}category" % self.nsItunes)
        assert itunes_category is not None
        itunes_subcategory = itunes_category\
            .find("{%s}category" % self.nsItunes)
        assert itunes_subcategory is not None
        self.assertEqual(itunes_subcategory.get("text"), c.subcategory)

    def test_categoryChecks(self):
        c = ("Arts", "Food")
        self.assertRaises(TypeError, setattr, self.fg, "category", c)

    def test_explicitIsExplicit(self):
        self.fg.explicit = True
        channel = self.fg._create_rss().find("channel")
        itunes_explicit = channel.find("{%s}explicit" % self.nsItunes)
        assert itunes_explicit is not None
        assert itunes_explicit.text.lower() in ("yes", "explicit", "true"),\
            "itunes:explicit was %s, expected yes, explicit or true" \
            % itunes_explicit.text

    def test_explicitIsClean(self):
        self.fg.explicit = False
        channel = self.fg._create_rss().find("channel")
        itunes_explicit = channel.find("{%s}explicit" % self.nsItunes)
        assert itunes_explicit is not None
        assert itunes_explicit.text.lower() in ("no", "clean", "false"),\
            "itunes:explicit was %s, expected no, clean or false" \
            % itunes_explicit.text

    def test_mandatoryValues(self):
        # Try to create a Podcast once for each mandatory property.
        # On each iteration, exactly one of the properties is not set.
        # Therefore, an exception should be thrown on each iteration.
        mandatory_properties = set([
            "description",
            "title",
            "link",
            "explicit",
        ])

        for test_property in mandatory_properties:
            fg = Podcast()
            if test_property != "description":
                fg.description = self.description
            if test_property != "title":
                fg.name = self.name
            if test_property != "link":
                fg.website = self.website
            if test_property != "explicit":
                fg.explicit = self.explicit
            try:
                self.assertRaises(ValueError, fg._create_rss)
            except AssertionError as e:
                raise_from(AssertionError(
                    "The test failed for %s" % test_property), e)

    def test_withholdFromItunesOffByDefault(self):
        assert not self.fg.withhold_from_itunes

    def test_withholdFromItunes(self):
        self.fg.withhold_from_itunes = True
        itunes_block = self.fg._create_rss().find("channel")\
            .find("{%s}block" % self.nsItunes)
        assert itunes_block is not None
        self.assertEqual(itunes_block.text.lower(), "yes")

        self.fg.withhold_from_itunes = False
        itunes_block = self.fg._create_rss().find("channel")\
            .find("{%s}block" % self.nsItunes)
        assert itunes_block is None

    def test_modifyingSkipDaysAfterwards(self):
        self.fg.skip_days.add("Unrecognized day")
        self.assertRaises(ValueError, self.fg.rss_str)
        self.fg.skip_days.remove("Unrecognized day")
        self.fg.rss_str()  # Now it works

    def test_modifyingSkipHoursAfterwards(self):
        self.fg.skip_hours.add(26)
        self.assertRaises(ValueError, self.fg.rss_str)
        self.fg.skip_hours.remove(26)
        self.fg.rss_str()  # Now it works

    # Tests for xslt
    def test_xslt_str(self):
        def use_str(**kwargs):
            return self.fg.rss_str(**kwargs)
        self.help_test_xslt_using(use_str)

    def test_xslt_file(self):
        def use_file(**kwargs):
            return self.getRssFeedFileContents(self.fg, **kwargs)
        self.help_test_xslt_using(use_file)

    def help_test_xslt_using(self, generated_feed):
        """Run tests for xslt, generating the feed str using the given function.
        """
        xslt_path = "http://example.com/mystylesheet.xsl"
        xslt_pi = "<?xml-stylesheet"

        # No xslt when set to None
        self.fg.xslt = None
        assert xslt_pi not in generated_feed()
        assert xslt_pi not in generated_feed(minimize=True)
        assert xslt_pi not in generated_feed(xml_declaration=False)

        self.fg.xslt = xslt_path

        # Now we have the stylesheet in there
        assert xslt_pi in generated_feed()
        assert xslt_pi in generated_feed(minimize=True)
        assert xslt_pi in generated_feed(xml_declaration=False)

        assert xslt_path in generated_feed()
        assert xslt_path in generated_feed(minimize=True)
        assert xslt_path in generated_feed(xml_declaration=False)

    def test_imageWarningNoExt(self):
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            self.assertEqual(len(w), 0)

            # Set image to a URL without proper file extension
            no_ext = "http://static.example.com/images/logo"
            self.fg.image = no_ext
            # Did we get a warning?
            self.assertEqual(1, len(w))
            assert issubclass(w.pop().category, NotSupportedByItunesWarning)
            # Was the image set?
            self.assertEqual(no_ext, self.fg.image)

    def test_imageWarningBadExt(self):
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")

            # Set image to a URL with an unsupported file extension
            bad_ext = "http://static.example.com/images/logo.gif"
            self.fg.image = bad_ext
            # Did we get a warning?
            self.assertEqual(1, len(w))
            # Was it of the correct type?
            assert issubclass(w.pop().category, NotSupportedByItunesWarning)
            # Was the image still set?
            self.assertEqual(bad_ext, self.fg.image)

    def test_imageNoWarningWithGoodExt(self):
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")

            # Set image to a URL with a supported file extension
            extensions = ["jpg", "png", "jpeg"]
            for extension in extensions:
                good_ext = "http://static.example.com/images/logo." + extension
                self.fg.image = good_ext
                # Did we get no warning?
                self.assertEqual(0, len(w), "Extension %s raised warnings (%s)"
                                 % (extension, w))
                # Was the image set?
                self.assertEqual(good_ext, self.fg.image)
Exemplo n.º 34
0
    if next((x for x in session_items if x['CID'] == cid), None):
        print(f'WARNING: duplicate CID {cid} for new item: {title}')

# write the new sessions json file
updated_session_items = new_items + session_items

for item in updated_session_items:
    item['link'] = f'{ipfs_prefix}{item["CID"]}{ipfs_suffix}'

with open(sessions_filename, 'w') as outfile:
    json.dump(updated_session_items, outfile, indent=2)

print('>>> wrote fresh sessions.json file')

# write the new rss file
p = Podcast()

p.name = "The Objectivism Seminar"
p.category = Category("Society &amp; Culture", "Philosophy")
p.language = "en-US"
p.explicit = True
p.description = (
    "A weekly online conference call to systematically study " +
    "the philosophy of Objectivism via the works of prominent Rand scholars.")
p.website = "https://www.ObjectivismSeminar.com"
p.image = "https://www.ObjectivismSeminar.com/assets/images/atlas-square.jpg"
p.feed_url = "https://www.ObjectivismSeminar.com/archives/rss"
p.authors = [Person("Greg Perkins, Host", "*****@*****.**")]
p.owner = Person("Greg Perkins", "*****@*****.**")

p.episodes += [
Exemplo n.º 35
0
class Ximalaya():
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.album_list_api = "http://www.ximalaya.com/revision/play/album?albumId={}&pageNum=1&sort=1&pageSize=999".format(
            album_id)
        self.album_url = 'http://www.ximalaya.com/album/%s' % album_id
        self.header = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Referer': self.album_url,
            'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1',
        }

    def album(self):
        page = requests.get(self.album_url, headers=self.header)
        soup = BeautifulSoup(page.content, "lxml")

        # 初始化
        self.podcast = Podcast()
        self.podcast.name = soup.find('h1', 'title').get_text()
        self.podcast.authors.append(
            Person("Powered by forecho", '*****@*****.**'))
        self.podcast.website = self.album_url
        self.podcast.copyright = 'cc-by'
        self.podcast.description = soup.find('div', 'album-intro').get_text()
        self.podcast.language = 'cn'
        self.podcast.image = soup.find(
            'div', 'album-info').find('img').get('src').split('!')[0]
        self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
        self.podcast.category = Category('Technology', 'Podcasting')
        self.podcast.explicit = False
        self.podcast.complete = False
        self.podcast.owner = Person("forecho", '*****@*****.**')

        album_list_content = requests.get(self.album_list_api,
                                          headers=self.header).content
        album_list_data = json.loads(album_list_content.decode('utf-8'))
        count = len(album_list_data['data']['tracksAudioPlay'])
        for each in album_list_data['data']['tracksAudioPlay']:
            try:
                page_info = requests.get('http://www.ximalaya.com/%s' %
                                         each['trackUrl'],
                                         headers=self.header)
                soup_info = BeautifulSoup(page_info.content, "lxml")
                episode = self.podcast.add_episode()
                episode.id = str(each['index'])
                episode.title = each['trackName']
                print self.podcast.name + '=====' + each['trackName']
                image = each['trackCoverPath'].split('!')[0]
                if (image[-4:] == '.gif' or image[-4:] == '.bmp'):
                    episode.image = self.podcast.image
                else:
                    episode.image = image
                if soup_info.find('article', 'intro'):
                    episode.summary = soup_info.find(
                        'article',
                        'intro').get_text().encode('gbk',
                                                   'ignore').decode('gbk')
                else:
                    episode.summary = each['trackName']
                episode.link = 'http://www.ximalaya.com/%s' % each['albumUrl']
                episode.authors = [Person("forecho", '*****@*****.**')]
                episode.publication_date = self.reduction_time(
                    soup_info.find('span', 'time').get_text())
                episode.media = Media(each['src'], each['duration'])
                episode.position = count - each['index'] + 1
            except Exception as e:
                print('异常:', e)
                print('异常 URL:',
                      'http://www.ximalaya.com/%s' % each['trackUrl'])

        # 生成文件
        # print self.podcast.rss_str()
        self.podcast.rss_file('ximalaya/%s.rss' % self.album_id, minimize=True)

    # 时间转换
    @staticmethod
    def reduction_time(created_date):
        timestamp = datetime.strptime(created_date, "%Y-%m-%d %H:%M:%S")
        return datetime(timestamp.year,
                        timestamp.month,
                        timestamp.day,
                        timestamp.hour,
                        timestamp.minute,
                        tzinfo=pytz.utc)
Exemplo n.º 36
0

content = requests.get(base_url).content

soup = BeautifulSoup(content, features="lxml")


urls_to_follow = []
for anchor in soup.select("#listProgramsContent a")[:10]:
    urls_to_follow.append(base_href + anchor.get("href"))


p = Podcast(
   name="Alta Tensão",
   description="Alta Tensão com António Freitas",
   image="https://cdn-images.rtp.pt/EPG/radio/imagens/1068_10159_53970.jpg",
   website=base_url,
   explicit=True,
)

episodes = []

for url in urls_to_follow:
    content = requests.get(url).content
    soup = BeautifulSoup(content, features="lxml")
    res = re.search(b'file : "(.+?)",\\n', content)
    title = soup.select("b.vod-title")[0].text
    date = soup.select(".vod-data p span.episode-date")[0].text
    media_url = res.groups()[0].decode()
    head = requests.head(url)
    if '\n' in title:
Exemplo n.º 37
0
class Ximalaya():
    def __init__(self, album_id):
        self.podcast = None
        self.album_id = album_id
        self.page_size = 30
        self.album_info_url = "https://www.ximalaya.com/revision/album?albumId={}"
        self.album_list_url = "https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&pageSize={}"
        self.detail_url = "https://mobile.ximalaya.com/v1/track/baseInfo?device=android&trackId={}"
        self.album_url = "https://www.ximalaya.com/album/{}"
        self.header = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Referer': self.album_url.format(self.album_id),
            'Cookie': '_ga=GA1.2.1628478964.1476015684; _gat=1',
        }

    def album(self):
        album_info = requests.get(self.album_info_url.format(self.album_id),
                                  headers=self.header).content
        album_info_content = json.loads(album_info.decode('utf-8'))
        if album_info_content['ret'] == 200:
            album_info_data = album_info_content['data']

            # 初始化
            self.podcast = Podcast()
            self.podcast.name = album_info_data['mainInfo']['albumTitle']
            self.podcast.authors.append(
                Person("Powered by forecho", '*****@*****.**'))
            self.podcast.website = self.album_url.format(self.album_id)
            self.podcast.copyright = 'cc-by'
            if album_info_data['mainInfo']['richIntro']:
                self.podcast.description = album_info_data['mainInfo'][
                    'richIntro']
            else:
                self.podcast.description = self.podcast.name
            self.podcast.language = 'cn'
            self.podcast.image = 'https:' + album_info_data['mainInfo'][
                'cover'].split('!')[0]
            self.podcast.feed_url = 'http://podcast.forecho.com/ximalaya/%s.rss' % self.album_id
            self.podcast.category = Category('Technology', 'Podcasting')
            self.podcast.explicit = False
            self.podcast.complete = False
            self.podcast.owner = Person("forecho", '*****@*****.**')
            page_num = 1
            # py2 +1
            track_total_count = math.ceil(
                album_info_data['tracksInfo']['trackTotalCount'] /
                self.page_size) + 1
            while page_num <= track_total_count:
                album_list = requests.get(self.album_list_url.format(
                    self.album_id, page_num, self.page_size),
                                          headers=self.header).content
                album_list_content = json.loads(album_list.decode('utf-8'))
                count = len(album_list_content['data']['tracksAudioPlay'])
                for each in album_list_content['data']['tracksAudioPlay']:
                    try:
                        detail = requests.get(self.detail_url.format(
                            each['trackId']),
                                              headers=self.header).content
                        detail_content = json.loads(detail.decode('utf-8'))
                        episode = self.podcast.add_episode()
                        episode.id = str(each['index'])
                        episode.title = each['trackName']
                        print(self.podcast.name + '=====' + each['trackName'])
                        image = each['trackCoverPath'].split('!')[0]
                        if image[-4:] == '.png' or image[-4:] == '.jpg':
                            episode.image = 'https:' + image
                        else:
                            episode.image = self.podcast.image
                        if 'intro' in detail_content:
                            episode.summary = detail_content['intro'].replace(
                                '\r\n', '')
                        else:
                            episode.summary = each['trackName']
                        episode.link = 'http://www.ximalaya.com%s' % each[
                            'albumUrl']
                        episode.authors = [
                            Person("forecho", '*****@*****.**')
                        ]
                        episode.publication_date = self.reduction_time(
                            detail_content['createdAt'])
                        episode.media = Media(each['src'], each['duration'])
                        episode.position = count - each['index'] + 1
                    except Exception as e:
                        print('异常:', e)
                        print('异常 URL:',
                              'https://www.ximalaya.com%s' % each['trackUrl'])
                        traceback.print_exc()
                # 生成文件
                # print self.podcast.rss_str()
                page_num = page_num + 1
            self.podcast.rss_file('ximalaya/%s.rss' % self.album_id,
                                  minimize=True)

    # 时间转换 参数 毫秒时间戳
    @staticmethod
    def reduction_time(time):
        timestamp = datetime.fromtimestamp(time / 1000)
        return datetime(timestamp.year,
                        timestamp.month,
                        timestamp.day,
                        timestamp.hour,
                        timestamp.minute,
                        tzinfo=pytz.utc)
Exemplo n.º 38
0
def genero_feed(episodesList):
    if episodesList:
        # Creo un nuovo podcast
        p = Podcast()

        p.name = "NECST Tech Time"
        p.description = "The NECSTLab (Novel, Emerging Computing System Technologies Laboratory) is a laboratory inside DEIB department of Politecnico di Milano, where there are a number of different research lines on advanced topics in computing systems: from architectural characteristics, to hardware-software codesign methodologies, to security and dependability issues of complex system architectures (scaling from mobile devices to large virtualized datacenters)."
        p.website = "http://www.poliradio.it/podcast/programmi/34/necst-tech-time"
        p.explicit = True
        p.image = "https://rss.draghetti.it/necst_image.jpg"
        p.feed_url = "https://rss.draghetti.it/necstpodcast.xml"
        p.copyright = "Poli Radio"
        p.language = "it-IT"

        for episodedetails in episodesList:
            episode = Episode()

            episode.title = episodedetails[1].encode("ascii", "ignore")
            episode.link = episodedetails[2].encode("ascii", "ignore")

            # La dimensione e statistica in base alle puntante analizzate
            episode.media = Media(episodedetails[3], 30000000, type="audio/x-m4a", duration=None)
            episode.publication_date = episodedetails[4]

            p.episodes.append(episode)

        # Print to stdout, just as an example
        p.rss_file(rssfile, minimize=False)
Exemplo n.º 39
0
    def generate_podcast(self, feed_name: str) -> str:
        """
        Create podcast XML based on the files found in podcastDir. Taken from
        https://podgen.readthedocs.io/en/latest/usage_guide/podcasts.html

        :param self: PodcastService class
        :param feed_name: name of the feed and the sub-directory for files
        :return:  string of the podcast
        """
        # Initialize the feed
        p = Podcast()

        # Required fields
        p.name = f'{feed_name} Archive'
        p.description = 'Stuff to listen to later'
        p.website = self.base_url
        p.complete = False

        # Optional
        p.language = 'en-US'
        p.feed_url = f'{p.website}/feeds/{feed_name}/rss'
        p.explicit = False
        p.authors.append(Person("Anthology"))

        # for filepath in glob.iglob(f'{self.search_dir}/{feed_name}/*.mp3'):
        for path in Path(f'{self.search_dir}/{feed_name}').glob('**/*.mp3'):
            filepath = str(path)
            episode = p.add_episode()

            # Attempt to load saved metadata
            metadata_file_name = filepath.replace('.mp3', '.json')
            try:
                with open(metadata_file_name) as metadata_file:
                    metadata = json.load(metadata_file)
            except FileNotFoundError:
                metadata = {}
            except JSONDecodeError:
                metadata = {}
                self.logger.error(f'Failed to read {metadata_file_name}')

            # Build the episode based on either the saved metadata or the file details
            episode.title = metadata.get(
                'title',
                filepath.split('/')[-1].rstrip('.mp3'))
            episode.summary = metadata.get('summary',
                                           htmlencode('Some Summary'))
            if 'link' in metadata:
                episode.link = metadata.get('link')
            if 'authors' in metadata:
                episode.authors = [
                    Person(author) for author in metadata.get('authors')
                ]
            episode.publication_date = \
                isoparse(metadata.get('publication_date')) if 'publication_date' in metadata \
                else datetime.fromtimestamp(os.path.getmtime(filepath), tz=pytz.utc)
            episode.media = Media(
                f'{p.website}/{filepath.lstrip(self.search_dir)}'.replace(
                    ' ', '+'), os.path.getsize(filepath))
            episode.media.populate_duration_from(filepath)

            if "image" in metadata:
                episode.image = metadata.get('image')
            else:
                for ext in ['.jpg', '.png']:
                    image_file_name = filepath.replace('.mp3', ext)
                    if os.path.isfile(image_file_name):
                        episode.image = f'{p.website}/{image_file_name.lstrip(self.search_dir)}'.replace(
                            ' ', '+')
                        break

            # Save the metadata for future editing
            if not os.path.exists(metadata_file_name):
                metadata = {
                    'title': episode.title,
                    'summary': episode.summary,
                    'publication_date': episode.publication_date,
                    'authors': episode.authors
                }
                with open(metadata_file_name, 'w') as outFile:
                    json.dump(metadata, outFile, indent=2, default=str)

        return p.rss_str()
Exemplo n.º 40
0
def main():
    """Create an example podcast and print it or save it to a file."""
    # There must be exactly one argument, and it is must end with rss
    if len(sys.argv) != 2 or not (
            sys.argv[1].endswith('rss')):
        # Invalid usage, print help message
        # print_enc is just a custom function which functions like print,
        # except it deals with byte arrays properly.
        print_enc ('Usage: %s ( <file>.rss | rss )' % \
                'python -m podgen')
        print_enc ('')
        print_enc ('  rss              -- Generate RSS test output and print it to stdout.')
        print_enc ('  <file>.rss       -- Generate RSS test teed and write it to file.rss.')
        print_enc ('')
        exit()

    # Remember what type of feed the user wants
    arg = sys.argv[1]

    from podgen import Podcast, Person, Media, Category, htmlencode
    # Initialize the feed
    p = Podcast()
    p.name = 'Testfeed'
    p.authors.append(Person("Lars Kiesow", "*****@*****.**"))
    p.website = 'http://example.com'
    p.copyright = 'cc-by'
    p.description = 'This is a cool feed!'
    p.language = 'de'
    p.feed_url = 'http://example.com/feeds/myfeed.rss'
    p.category = Category('Technology', 'Podcasting')
    p.explicit = False
    p.complete = False
    p.new_feed_url = 'http://example.com/new-feed.rss'
    p.owner = Person('John Doe', '*****@*****.**')
    p.xslt = "http://example.com/stylesheet.xsl"

    e1 = p.add_episode()
    e1.id = 'http://lernfunk.de/_MEDIAID_123#1'
    e1.title = 'First Element'
    e1.summary = htmlencode('''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Tamen
            aberramus a proposito, et, ne longius, prorsus, inquam, Piso, si ista
            mala sunt, placet. Aut etiam, ut vestitum, sic sententiam habeas aliam
            domesticam, aliam forensem, ut in fronte ostentatio sit, intus veritas
            occultetur? Cum id fugiunt, re eadem defendunt, quae Peripatetici,
            verba <3.''')
    e1.link = 'http://example.com'
    e1.authors = [Person('Lars Kiesow', '*****@*****.**')]
    e1.publication_date = datetime.datetime(2014, 5, 17, 13, 37, 10, tzinfo=pytz.utc)
    e1.media = Media("http://example.com/episodes/loremipsum.mp3", 454599964,
                     duration=
                     datetime.timedelta(hours=1, minutes=32, seconds=19))

    # Should we just print out, or write to file?
    if arg == 'rss':
        # Print
        print_enc(p.rss_str())
    elif arg.endswith('rss'):
        # Write to file
        p.rss_file(arg, minimize=True)