Пример #1
0
def main():
    if len(sys.argv) > 1:
        content = retrieve_file(sys.argv[1])
    else:
        content = retrieve_url()

    soup = BeautifulSoup(content)

    rss_items = []
    tweets = soup.findAll('table', { 'class': re.compile(r'\btweet\b') })

    for tweet in tweets:
        author = tweet.find('strong', { 'class': 'fullname' }).text
        username = tweet.find('span', { 'class': 'username' }).text
        content = tweet.find('div', { 'class': 'dir-ltr' })
        tweet_id = tweet.find('td', { 'class': 'timestamp' }).find('a')['name']
        date = parse_date(tweet.find('td', { 'class': 'timestamp' }).find('a').text)
        links = content.findAll('a')
        first_link = None
        for link in links:
            href = link.get('data-url') or link.get('href')

            if href.startswith('/'):
                href = 'https://www.twitter.com' + href

            link['href'] = href

            if not first_link and not href.startswith('https://www.twitter.com'):
                first_link = href


        description = '<a href="https://www.twitter.com/%s">%s - %s</a><br/>%s' % (username, username, author, content)

        rss_items.append(PyRSS2Gen.RSSItem(
            title = content.text,
            link = first_link,
            description = description,
            pubDate = date,
            guid = PyRSS2Gen.Guid(tweet_id, isPermaLink = 0)
            ))


    rss = PyRSS2Gen.RSS2(
            title = 'Starred Tweets',
            link = 'http://dropbox.kuijjer.com/starred_tweets.rss',
            description = 'Starred Tweets',
            lastBuildDate = datetime.datetime.now(),
            items = rss_items,
            )

    print xmlpp.get_pprint(rss.to_xml())
Пример #2
0
	def m_mniReformatClick( self, event ):
		'''
		Use pretty print to reformat xml-valid files
		'''
		self.m_statusBar.SetStatusText('')
		# Grab the file text and send it to the slightly
		# modified xmlpp to prettify
		body = self.m_txtMain.GetValue()
		body = xmlpp.get_pprint(body)
		self.m_txtMain.SetValue(body)
Пример #3
0
 def prettyPrintXML(xmlString):
     return xmlpp.get_pprint(xmlString)
Пример #4
0
 def testSpecialTokens(self):
     specialtokens = """<xml:test foo="b:ar">foo::bar:/adf32</xml:test>"""
     result = """<xml:test foo="b:ar">\n    foo::bar:/adf32\n</xml:test>\n"""
     self.assertEquals(xmlpp.get_pprint(specialtokens), result)
Пример #5
0
 def testBasic(self):
     basic = """<xml><test><test>foo bar</test></test><test>foo</test></xml>"""
     result = """<xml>\n    <test>\n        <test>\n            foo bar\n        </test>\n    </test>\n    <test>\n        foo\n    </test>\n</xml>\n"""
     self.assertEquals(xmlpp.get_pprint(basic), result)
Пример #6
0
 def testEncodingWithCDATA(self):
     encodingWithCDATA = """<?xml version="1.0" encoding="UTF-8" ?><testcase><system-out><![CDATA[<fdaa>fda>]]></system-out><system-err><![CDATA[]]></system-err></testcase>"""
     result = """<?xml version="1.0" encoding="UTF-8" ?>\n<testcase>\n    <system-out>\n        <![CDATA[<fdaa>fda>]]>\n    </system-out>\n    <system-err>\n        <![CDATA[]]>\n    </system-err>\n</testcase>\n"""
     self.assertEquals(xmlpp.get_pprint(encodingWithCDATA), result)
Пример #7
0
    def addToLibraryFile(libraryFile, epubData):
        """
        Add an EPUB to an ATOM library file
        """
        ### TODO: Finish implementing functionality
        newAuthor = epubData["creator"]
        newTitle = epubData["title"]

        baseEntry = [
            "    <entry>",
            "        <id>{0}</id>".format(epubData["identifier"]),
            "        <title>{0}</title>".format(epubData["title"]),
            "        <author>",
            "            <name>{0}</name>".format(epubData["creator"]),
            "        </author>",
            '        <content type="xhtml">',
            '            <div xmlns="http://www.w3.org/1999/xhtml">Published: {0}, Language: {1}, Subject: {2}</div>'.format(
                epubData["published"], epubData["language"], epubData["subject"]
            ),
            "        </content>",
            "        <summary>{0}</summary>".format(epubData["description"]),
            "        <updated>{0}Z</updated>".format(strftime("%Y-%m-%d %H:%M:%S")),
            '        <link type="application/epub+zip" href="{0}/{1}.epub" />'.format(
                epubData["authorHash"], epubData["titleHash"]
            ),
            '        <link rel="http://opds-spec.org/opds-cover-image-thumbnail" type="image/jpeg" title="cover thumbnail" href="{0}/{1}_tn.jpg" />'.format(
                epubData["authorHash"], epubData["titleHash"]
            ),
            '        <link rel="http://opds-spec.org/opds-cover-image" type="image/jpeg" title="cover image" href="{0}/{1}.jpg" />'.format(
                epubData["authorHash"], epubData["titleHash"]
            ),
            '        <link rel="x-stanza-cover-image-thumbnail" type="image/jpeg" href="{0}/{1}_tn.jpg" />'.format(
                epubData["authorHash"], epubData["titleHash"]
            ),
            '        <link rel="x-stanza-cover-image" type="image/jpeg" href="{0}/{1}.jpg" />'.format(
                epubData["authorHash"], epubData["titleHash"]
            ),
            "    </entry>",
        ]

        with open(libraryFile, "r") as libFile:
            atom = libFile.read()

        # Pretty print and standardize the ATOM file to make parsing a little easier
        atom = xmlpp.get_pprint(atom)
        atom = atom.replace("\r\n", "\n")
        entries = atom.split("\n")

        # Consider using xml rather than RegEx to parse the file
        i = 0
        for line in entries:
            if line.strip() == "<entry>":
                # Check to find the Alpha location in the file
                match = re.search("<.*>(.*)</.*>", entries[i + 1])
                curTitle = match.groups()[0]
                match = re.search("<.*>(.*)</.*>", entries[i + 7])
                curAuthor = match.groups()[0]

                if newAuthor > curAuthor:
                    pass
                elif newAuthor == curAuthor:
                    if newTitle > curTitle:
                        pass
                    elif newTitle == curTitle:
                        end = i + 1
                        while entries[end].strip() <> "</entry>":
                            end += 1

                        entries[i : end + 1] = baseEntry
                        break
                    else:
                        entries[i:1] = baseEntry
                        break
                elif newAuthor < curAuthor:
                    entries[i:1] = baseEntry
                    break
            i += 1

        # Now write the file out
        with open(libraryFile, "w") as f:
            atom = "\n".join(entries)[1:]
            f.write(atom)