Пример #1
0
def get_posts_on_page(soup,postlist):
    # find by id
    listdiv=soup.find(id="d_list")
    for child in listdiv: #this is the UL under div
        if child.name is not None:
            for grandchild in child: # this is the lis under uL
                if grandchild.name is not None:
                    post = Post()
                    time=grandchild.find_all("i")[0]
                    if time.text!=today:
                        continue
                    post.date=unicode(time.text).replace("/","-")
                    a=grandchild.find_all("a")[0]
                    post.link=a["href"]
                    post.title=unicode(a.text).replace("*","")
                    for grangrandchild in grandchild:
                        if type(grangrandchild) == NavigableString:
                            rawstr= unicode(grangrandchild).replace('-','').strip()
                            if(rawstr.strip()!=""):
                                post.author=rawstr[0:rawstr.index(" ")]
                    postlist.append(post)
    return postlist