def importRawFeed(fileName, url): f = open(fileName, 'w') rawFeed = readFeed(url) f.write(rawFeed) f.close() print "Raw feed saved in: %s" % fileName return rawFeed
def importRawFeed(fileName, url): f = open(fileName, 'w') rawFeed = readFeed(url); f.write(rawFeed) f.close() print "Raw feed saved in: %s" % fileName return rawFeed
def processFeeds(f, entries, offset, totalEntries): entryNumber = 0 for entry in entries: #Buzz post starts f.write("<div class=\"buzz\">") #Process post date entryDate = entry.findall(namespace + 'updated')[0].text.split('T')[0].split('-') prettyDate = "%s %s %s " % (entryDate[2], month_name[int(entryDate[1])], entryDate[0]) f.write("<div class=\"postdate\"><b>" + prettyDate + "</b></div>") #Process post content post = entry.findall(namespace + 'content')[0].text f.write("<div class=\"post\">" + smart_str(post) + "</div>") #Process attachments f.write("<div class=\"attachments\">") attachments = entry.findall(namespace_activity + 'object')[0].findall(namespace_buzz + 'attachment') for attachment in attachments: if attachment.findall(namespace + 'title'): title = attachment.findall(namespace + 'title')[0].text + "<br>" else: title = "" if attachment.findall(namespace + 'content'): content = attachment.findall(namespace + 'content')[0].text else: content = "" href = "" links = attachment.findall(namespace + 'link') for link in links: if link.get('rel') == 'alternate': href = link.get('href') f.write("<div class=\"attachment\"><b><a href=\"%s\">%s</a></b>%s</div>" % (href, smart_str(title), smart_str(content))) f.write("</div") #Process Links postId = entry.findall(namespace + 'id')[0].text links = entry.findall(namespace + 'link') f.write("<div class=\"links\">") #Process likers feed for link in links: if link.get('rel') == (namespace_buzz[1:-1] + '#liked'): likeFeedUrl = 'http' + link.get('href')[5:] + '&max-results=100&bhu' rawLikedFeed = readFeed(likeFeedUrl) likersTree = ET.XML(rawLikedFeed) likeCounter = 0 totalLikes = link.get(namespace_buzz + 'count') if int(totalLikes) != 0: f.write("<div class=\"likers\"><strong>Liked by</strong> [%s]: " % totalLikes) likers = likersTree.findall(namespace_likers + 'entry') for liker in likers: likerName = liker.findall(namespace_likers + 'displayName')[0].text likerUri = liker.findall(namespace_likers + 'profileUrl')[0].text f.write("<a href=\"%s\">%s</a>" % (likerUri, smart_str(likerName))) likeCounter += 1 if likeCounter != len(likers): f.write(", ") f.write("</div>") #Process comment feed for link in links: if link.get('rel') == 'replies': replyCount = link.get(namespace_thr + 'count') if int(replyCount) != 0: f.write("<div style=\"color:#111;\" class=\"comments\"><b>Comments(%s)</b><br>" % replyCount) replyFeedUrl = 'http' + link.get('href')[5:] + '&bhu' rawReplyFeed = readFeed(replyFeedUrl) comments = ET.XML(rawReplyFeed).findall(namespace + 'entry') for comment in comments: f.write("<div style=\"margin-left:20px;\" class=\"comment\">") reply = comment.findall(namespace + 'content')[0].text author = comment.findall(namespace + 'author')[0] authorName = author.findall(namespace + 'name')[0].text authorUri = author.findall(namespace + 'uri')[0].text commentDate = entry.findall(namespace + 'updated')[0].text.split('T')[0].split('-') prettyDate = "%s %s %s " % (commentDate[2], month_name[int(commentDate[1])], commentDate[0]) f.write("<a href=\"%s\">%s</a> (%s) - %s" % (authorUri, smart_str(authorName), prettyDate, smart_str(reply))) f.write("</div>") f.write("</div>") f.write("</div></div><br>") entryNumber += 1 print "Progress: (%d/%d) %d%%" % (entryNumber + offset, totalEntries, (entryNumber + offset) * 100 / totalEntries)
def processFeeds(f, entries, offset, totalEntries): entryNumber = 0 for entry in entries: #Buzz post starts f.write("<div class=\"buzz\">") #Process post date entryDate = entry.findall(namespace + 'updated')[0].text.split('T')[0].split('-') prettyDate = "%s %s %s " % (entryDate[2], month_name[int( entryDate[1])], entryDate[0]) f.write("<div class=\"postdate\"><b>" + prettyDate + "</b></div>") #Process post content post = entry.findall(namespace + 'content')[0].text f.write("<div class=\"post\">" + smart_str(post) + "</div>") #Process attachments f.write("<div class=\"attachments\">") attachments = entry.findall(namespace_activity + 'object')[0].findall(namespace_buzz + 'attachment') for attachment in attachments: if attachment.findall(namespace + 'title'): title = attachment.findall(namespace + 'title')[0].text + "<br>" else: title = "" if attachment.findall(namespace + 'content'): content = attachment.findall(namespace + 'content')[0].text else: content = "" href = "" links = attachment.findall(namespace + 'link') for link in links: if link.get('rel') == 'alternate': href = link.get('href') f.write( "<div class=\"attachment\"><b><a href=\"%s\">%s</a></b>%s</div>" % (href, smart_str(title), smart_str(content))) f.write("</div") #Process Links postId = entry.findall(namespace + 'id')[0].text links = entry.findall(namespace + 'link') f.write("<div class=\"links\">") #Process likers feed for link in links: if link.get('rel') == (namespace_buzz[1:-1] + '#liked'): likeFeedUrl = 'http' + link.get( 'href')[5:] + '&max-results=100&bhu' rawLikedFeed = readFeed(likeFeedUrl) likersTree = ET.XML(rawLikedFeed) likeCounter = 0 totalLikes = link.get(namespace_buzz + 'count') if int(totalLikes) != 0: f.write( "<div class=\"likers\"><strong>Liked by</strong> [%s]: " % totalLikes) likers = likersTree.findall(namespace_likers + 'entry') for liker in likers: likerName = liker.findall(namespace_likers + 'displayName')[0].text likerUri = liker.findall(namespace_likers + 'profileUrl')[0].text f.write("<a href=\"%s\">%s</a>" % (likerUri, smart_str(likerName))) likeCounter += 1 if likeCounter != len(likers): f.write(", ") f.write("</div>") #Process comment feed for link in links: if link.get('rel') == 'replies': replyCount = link.get(namespace_thr + 'count') if int(replyCount) != 0: f.write( "<div style=\"color:#111;\" class=\"comments\"><b>Comments(%s)</b><br>" % replyCount) replyFeedUrl = 'http' + link.get('href')[5:] + '&bhu' rawReplyFeed = readFeed(replyFeedUrl) comments = ET.XML(rawReplyFeed).findall(namespace + 'entry') for comment in comments: f.write( "<div style=\"margin-left:20px;\" class=\"comment\">" ) reply = comment.findall(namespace + 'content')[0].text author = comment.findall(namespace + 'author')[0] authorName = author.findall(namespace + 'name')[0].text authorUri = author.findall(namespace + 'uri')[0].text commentDate = entry.findall(namespace + 'updated')[0].text.split( 'T')[0].split('-') prettyDate = "%s %s %s " % (commentDate[2], month_name[ int(commentDate[1])], commentDate[0]) f.write("<a href=\"%s\">%s</a> (%s) - %s" % (authorUri, smart_str(authorName), prettyDate, smart_str(reply))) f.write("</div>") f.write("</div>") f.write("</div></div><br>") entryNumber += 1 print "Progress: (%d/%d) %d%%" % (entryNumber + offset, totalEntries, (entryNumber + offset) * 100 / totalEntries)