Exemplo n.º 1
0
def importRawFeed(fileName, url):
    f = open(fileName, 'w')
    rawFeed = readFeed(url)
    f.write(rawFeed)
    f.close()
    print "Raw feed saved in: %s" % fileName
    return rawFeed
Exemplo n.º 2
0
def importRawFeed(fileName, url):
    f = open(fileName, 'w')
    rawFeed = readFeed(url);
    f.write(rawFeed)
    f.close()
    print "Raw feed saved in: %s" % fileName
    return rawFeed
Exemplo n.º 3
0
def processFeeds(f, entries, offset, totalEntries):
    entryNumber = 0
    for entry in entries:
        #Buzz post starts        
        f.write("<div class=\"buzz\">")
        
        #Process post date
        entryDate = entry.findall(namespace + 'updated')[0].text.split('T')[0].split('-')
        prettyDate = "%s %s %s " % (entryDate[2], month_name[int(entryDate[1])], entryDate[0])
        f.write("<div class=\"postdate\"><b>" + prettyDate + "</b></div>")

        #Process post content
        post = entry.findall(namespace + 'content')[0].text
        f.write("<div class=\"post\">" + smart_str(post) + "</div>")
        
        #Process attachments
        f.write("<div class=\"attachments\">")
        attachments = entry.findall(namespace_activity + 'object')[0].findall(namespace_buzz + 'attachment')

        for attachment in attachments:
            if attachment.findall(namespace + 'title'):
                title = attachment.findall(namespace + 'title')[0].text + "<br>"
            else: title = ""
            if attachment.findall(namespace + 'content'):
                content = attachment.findall(namespace + 'content')[0].text
            else: content = ""
            href = ""
            links = attachment.findall(namespace + 'link')
            for link in links:
                if link.get('rel') == 'alternate':
                    href = link.get('href')
            f.write("<div class=\"attachment\"><b><a href=\"%s\">%s</a></b>%s</div>" % (href, smart_str(title), smart_str(content)))
        f.write("</div")
        #Process Links
        postId = entry.findall(namespace + 'id')[0].text
        links = entry.findall(namespace + 'link')
        f.write("<div class=\"links\">")

        #Process likers feed
        for link in links:
            if link.get('rel') == (namespace_buzz[1:-1] + '#liked'):
                likeFeedUrl = 'http' + link.get('href')[5:] + '&max-results=100&bhu'
                rawLikedFeed = readFeed(likeFeedUrl)
                likersTree = ET.XML(rawLikedFeed)
                likeCounter = 0
                totalLikes = link.get(namespace_buzz + 'count')
                if int(totalLikes) != 0:
                    f.write("<div class=\"likers\"><strong>Liked by</strong> [%s]: " % totalLikes)
                    likers = likersTree.findall(namespace_likers + 'entry')
                    for liker in likers:
                        likerName = liker.findall(namespace_likers + 'displayName')[0].text
                        likerUri = liker.findall(namespace_likers + 'profileUrl')[0].text
                        f.write("<a href=\"%s\">%s</a>" % (likerUri, smart_str(likerName)))
                        likeCounter += 1
                        if likeCounter != len(likers):
                            f.write(", ")
                    f.write("</div>")
 
        #Process comment feed
        for link in links:
            if link.get('rel') == 'replies':
                replyCount = link.get(namespace_thr + 'count')
                if int(replyCount) != 0:
                    f.write("<div style=\"color:#111;\" class=\"comments\"><b>Comments(%s)</b><br>" % replyCount)
                    replyFeedUrl = 'http' + link.get('href')[5:] + '&bhu'
                    rawReplyFeed = readFeed(replyFeedUrl)
                    comments = ET.XML(rawReplyFeed).findall(namespace + 'entry')
                    for comment in comments:
                        f.write("<div style=\"margin-left:20px;\" class=\"comment\">")
                        reply = comment.findall(namespace + 'content')[0].text
                        author = comment.findall(namespace + 'author')[0]
                        authorName = author.findall(namespace + 'name')[0].text
                        authorUri = author.findall(namespace + 'uri')[0].text
                        commentDate = entry.findall(namespace + 'updated')[0].text.split('T')[0].split('-')
                        prettyDate = "%s %s %s " % (commentDate[2], month_name[int(commentDate[1])], commentDate[0])
                        f.write("<a href=\"%s\">%s</a> (%s) - %s" % (authorUri, smart_str(authorName), prettyDate, smart_str(reply)))
                        f.write("</div>")
                    f.write("</div>")
        f.write("</div></div><br>")
        entryNumber += 1
        print "Progress: (%d/%d) %d%%" % (entryNumber + offset, totalEntries, (entryNumber + offset) * 100 / totalEntries)
Exemplo n.º 4
0
def processFeeds(f, entries, offset, totalEntries):
    entryNumber = 0
    for entry in entries:
        #Buzz post starts
        f.write("<div class=\"buzz\">")

        #Process post date
        entryDate = entry.findall(namespace +
                                  'updated')[0].text.split('T')[0].split('-')
        prettyDate = "%s %s %s " % (entryDate[2], month_name[int(
            entryDate[1])], entryDate[0])
        f.write("<div class=\"postdate\"><b>" + prettyDate + "</b></div>")

        #Process post content
        post = entry.findall(namespace + 'content')[0].text
        f.write("<div class=\"post\">" + smart_str(post) + "</div>")

        #Process attachments
        f.write("<div class=\"attachments\">")
        attachments = entry.findall(namespace_activity +
                                    'object')[0].findall(namespace_buzz +
                                                         'attachment')

        for attachment in attachments:
            if attachment.findall(namespace + 'title'):
                title = attachment.findall(namespace +
                                           'title')[0].text + "<br>"
            else:
                title = ""
            if attachment.findall(namespace + 'content'):
                content = attachment.findall(namespace + 'content')[0].text
            else:
                content = ""
            href = ""
            links = attachment.findall(namespace + 'link')
            for link in links:
                if link.get('rel') == 'alternate':
                    href = link.get('href')
            f.write(
                "<div class=\"attachment\"><b><a href=\"%s\">%s</a></b>%s</div>"
                % (href, smart_str(title), smart_str(content)))
        f.write("</div")
        #Process Links
        postId = entry.findall(namespace + 'id')[0].text
        links = entry.findall(namespace + 'link')
        f.write("<div class=\"links\">")

        #Process likers feed
        for link in links:
            if link.get('rel') == (namespace_buzz[1:-1] + '#liked'):
                likeFeedUrl = 'http' + link.get(
                    'href')[5:] + '&max-results=100&bhu'
                rawLikedFeed = readFeed(likeFeedUrl)
                likersTree = ET.XML(rawLikedFeed)
                likeCounter = 0
                totalLikes = link.get(namespace_buzz + 'count')
                if int(totalLikes) != 0:
                    f.write(
                        "<div class=\"likers\"><strong>Liked by</strong> [%s]: "
                        % totalLikes)
                    likers = likersTree.findall(namespace_likers + 'entry')
                    for liker in likers:
                        likerName = liker.findall(namespace_likers +
                                                  'displayName')[0].text
                        likerUri = liker.findall(namespace_likers +
                                                 'profileUrl')[0].text
                        f.write("<a href=\"%s\">%s</a>" %
                                (likerUri, smart_str(likerName)))
                        likeCounter += 1
                        if likeCounter != len(likers):
                            f.write(", ")
                    f.write("</div>")

        #Process comment feed
        for link in links:
            if link.get('rel') == 'replies':
                replyCount = link.get(namespace_thr + 'count')
                if int(replyCount) != 0:
                    f.write(
                        "<div style=\"color:#111;\" class=\"comments\"><b>Comments(%s)</b><br>"
                        % replyCount)
                    replyFeedUrl = 'http' + link.get('href')[5:] + '&bhu'
                    rawReplyFeed = readFeed(replyFeedUrl)
                    comments = ET.XML(rawReplyFeed).findall(namespace +
                                                            'entry')
                    for comment in comments:
                        f.write(
                            "<div style=\"margin-left:20px;\" class=\"comment\">"
                        )
                        reply = comment.findall(namespace + 'content')[0].text
                        author = comment.findall(namespace + 'author')[0]
                        authorName = author.findall(namespace + 'name')[0].text
                        authorUri = author.findall(namespace + 'uri')[0].text
                        commentDate = entry.findall(namespace +
                                                    'updated')[0].text.split(
                                                        'T')[0].split('-')
                        prettyDate = "%s %s %s " % (commentDate[2], month_name[
                            int(commentDate[1])], commentDate[0])
                        f.write("<a href=\"%s\">%s</a> (%s) - %s" %
                                (authorUri, smart_str(authorName), prettyDate,
                                 smart_str(reply)))
                        f.write("</div>")
                    f.write("</div>")
        f.write("</div></div><br>")
        entryNumber += 1
        print "Progress: (%d/%d) %d%%" % (entryNumber + offset, totalEntries,
                                          (entryNumber + offset) * 100 /
                                          totalEntries)