Exemplo n.º 1
0
	snap_preview = doc.find_class('snap_preview')[0]
#	lastnode = snap_preview[-1]
#	if lastnode.tag == 'div' and lastnode.get('class') == 'possibly-related':
#		snap_preview.remove(lastnode)
	post.Contents = html.tostring(snap_preview)
	
#	commentstring = doc.get_element_by_id('comments').text.split()[0]
#	if commentstring == "No":
#		post.Comments = 0
#	else:
#		post.Comments = int(commentstring)
	return post

if __name__ == "__main__":
	blog = Blog(filepath="dorait.blog", createnew=False)
	for ui in sitemap.parse_sitemap("http://krishashok.wordpress.com/sitemap.xml"):
		if not '/20' in ui.loc:
			#guards against non-post pages. Needs to be more robust
			#But okay for now!
			continue
			
		if not blog.has_key(ui.loc):
			p = post_from_page(ui.loc)
			p.PostedAt = ui.lastmod
			blog.persist(ui.loc, p)
			print "Done %s" % ui.loc
		else:
			print "Skipped %s" % ui.loc
			 

Exemplo n.º 2
0
	snap_preview = doc.find_class('snap_preview')[0]
	lastnode = snap_preview[-1]
	if lastnode.tag == 'div' and lastnode.get('class') == 'possibly-related':
		snap_preview.remove(lastnode)
	post.Contents = html.tostring(snap_preview)
	
	commentstring = doc.get_element_by_id('comments').text.split()[0]
	if commentstring == "No":
		post.Comments = 0
	else:
		post.Comments = int(commentstring)
	return post

if __name__ == "__main__":
	blog = Blog(filepath="dorait.blog", createnew=False)
	for ui in sitemap.parse_sitemap("http://dorai.wordpress.com/sitemap.xml"):
		if not '/20' in ui.loc:
			#guards against non-post pages. Needs to be more robust
			#But okay for now!
			continue
			
		if not blog.has_key(ui.loc):
			p = post_from_page(ui.loc)
			p.PostedAt = ui.lastmod
			blog.persist(ui.loc, p)
			print "Done %s" % ui.loc
		else:
			print "Skipped %s" % ui.loc
			 

Exemplo n.º 3
0
    if categories:
        post.Categories = [c.text for c in categories]

    snap_preview = doc.find_class('snap_preview')[0]
    post.Contents = html.tostring(snap_preview)
    
#    commentstring = doc.get_element_by_id('comments').text.split()[0]
#    if commentstring == "No":
#        post.Comments = 0
#    else:
#        post.Comments = int(commentstring)
    return post

if __name__ == "__main__":
    blog = Blog(filepath="mona.blog", createnew=False)
    for ui in sitemap.parse_sitemap("http://pixelbits.wordpress.com/sitemap.xml"):
        if not '/20' in ui.loc:
            #guards against non-post pages. Needs to be more robust
            #But okay for now!
            continue
            
        if not blog.has_key(ui.loc):
            p = post_from_page(ui.loc)
            p.PostedAt = ui.lastmod
            blog.persist(ui.loc, p)
            print "Done %s" % ui.loc
        else:
            print "Skipped %s" % ui.loc