def wm_txt_parser(html, url, user_id): id = url.rsplit('=')[-1] name = txt_wrap_by('target="_blank">', '</a></p>', html) author = txt_wrap_by('">来自:', '<', html) link = txt_wrap_by('href="', '"', txt_wrap_by('<p class="info', '</p>', html)) like = txt_wrap_by('class="num-likeIt">', '人喜欢</a>', html) txt = txt_wrap_by('<div class="content">', ' <p class="operating">', html) time = txt_wrap_by('<span class="time">', '</span>', html) wm = wm_save(id, like, name, author, link, time, txt) wm_fav(user_id, wm.id)
def wm_parser(html, url): user = txt_wrap_by('&u=', '&', url) #print user time = txt_wrap_by('<li id="maxActionTimeInMs" m="', '"', html) if time and 'm=' + time not in url and int(time) > 0: yield wm_parser, url[:url.rfind('=') + 1] + str(time) user_id = wm_user_id(user) for i in txt_wrap_by_all(' itemid="', '<p class="operating">', html): if 'class="content"' in i: id = i[:i.find('"')] wm = SpiderWm.get(wmid=id) if wm is None: yield wm_txt_parser, 'http://www.wumii.com/reader/article?id=%s' % id, user_id else: wm_fav(user_id, wm.id)
def wm_parser(html, url): user = txt_wrap_by('&u=', '&', url) #print user time = txt_wrap_by('<li id="maxActionTimeInMs" m="', '"', html) if time and 'm='+time not in url and int(time) > 0: yield wm_parser, url[:url.rfind('=')+1]+str(time) user_id = wm_user_id(user) for i in txt_wrap_by_all(' itemid="', '<p class="operating">', html): if 'class="content"' in i: id = i[:i.find('"')] wm = SpiderWm.get(wmid=id) if wm is None: yield wm_txt_parser, 'http://www.wumii.com/reader/article?id=%s'%id, user_id else: wm_fav(user_id, wm.id)
def wm_txt_parser(html, url, user_id): id = url.rsplit('=')[-1] name = txt_wrap_by('target="_blank">', '</a></p>', html) author = txt_wrap_by('">来自:', '<', html) link = txt_wrap_by( 'href="', '"', txt_wrap_by('<p class="info', '</p>', html) ) like = txt_wrap_by( 'class="num-likeIt">', '人喜欢</a>', html ) txt = txt_wrap_by( '<div class="content">', ' <p class="operating">', html ) time = txt_wrap_by('<span class="time">', '</span>', html) wm = wm_save(id, like, name, author, link, time, txt) wm_fav(user_id, wm.id)