def gossip_list_page(page=0): if page <= 0: abort(404) total = Gossip.select().count() total_page = int(math.ceil(total * 1.0 / config.ITEMS_PER_PAGE)) gossip_list = Gossip.select().order_by(Gossip.t.desc(), Gossip.id.desc()).paginate( page, config.ITEMS_PER_PAGE) return render_template("gossip_list.html", page=page, total_page=total_page, gossip_list=gossip_list)
def load_gossip_page(page, uid=crawler.uid): param = { "id": uid, "page": page, "guest": crawler.uid, } r = crawler.get_json(config.GOSSIP_URL, params=param, method='POST') for c in r['array']: local_pic = get_image( c['tinyUrl']) if 'tinyUrl' in c else config.DEFAULT_HEAD_PIC gossip = { 'id': c['id'], 'uid': uid, 't': datetime.strptime(c['time'], "%Y-%m-%d %H:%M"), 'guestId': c['guestId'], 'guestName': c['guestName'], 'headPic': local_pic, # 居然保存的是当时的头像,这里不能往 User 表里塞了 'attachSnap': get_image(c.get('headUrl', '')), 'attachPic': get_image(c.get('largeUrl', '')), 'whisper': c['whisper'] == 'true', 'wap': c['wap'] == 'true', 'gift': c['giftImg'] if c['gift'] == 'true' else '', 'content': '' } # 内容出现在好几个地方,body, filterdBody, filterOriginBody # filterOriginBody 是连表情都没转义的 # filterdBody 加了表情转义,但也加了那个坑爹的 <span style="color:#000000"> # 还有手机发布的 <xiaonei_wap/>,和送礼物带的 <xiaonei_gift /> body = c['filterdBody'].replace('\n', '<br>').replace('<xiaonei_wap/>', '') if gossip['gift']: body = re.sub(r'<xiaonei_gift img="http:[\.a-z0-9/]*"/>', '', body) patt = normal_pattern.findall(body) if not patt: try: logger.info(u'parse gossip body failed:\n {body}'.format( body=c["filterdBody"])) except UnicodeEncodeError: logger.info( 'parse gossip body failed, check origin filterBody') else: gossip['content'] = patt[0] Gossip.insert(**gossip).on_conflict('replace').execute() count = len(r["array"]) logger.info(' crawled {count} gossip on page {page}'.format(count=count, page=page)) return count
def load_gossip_page(uid=crawler.uid, offset=0): r = crawler.get_json(config.GOSSIP_URL, json_=get_gossip_payload(uid, offset), method="POST") for c in r["data"]["gossipList"]: local_pic = get_image(c.get("senderHeadUrl", config.DEFAULT_HEAD_PIC)) ts = datetime.strptime(c["time"], "%Y-%m-%dT%H:%M:%S.%f%z").timestamp() gossip = { "id": c["id"], "uid": uid, "t": datetime.fromtimestamp( ts), # for some reason, a conversion is needed "guestId": c["sender"], "guestName": c["senderName"], "headPic": local_pic, # 居然保存的是当时的头像,这里不能往 User 表里塞了 "attachSnap": get_image(c.get("headUrl", "")), "attachPic": get_image(c.get("largeUrl", "")), "whisper": "xiaonei_only_to_me" in c["body"], "wap": False, # c['wap'] == 'true', "gift": "", # c['giftImg'] if c['gift'] == 'true' else '', "content": "", } body = c["body"] # remove gift body = re.sub(r'<xiaonei_gift img="http:[\.a-z0-9/]*"/>', "", body) # remove xiaonei_only_to_me body = re.sub(r"<xiaonei_only_to_me/><Toid/>\d+$", "", body) gossip["content"] = body Gossip.insert(**gossip).on_conflict("replace").execute() count = len(r["data"]["gossipList"]) logger.info(" crawled {count} gossip on page {page}".format(count=count, page=offset // 10)) if offset + count == r["data"]["count"]: return count, -1 return count, offset + count
def gossip_list_page(uid, page=1): if page <= 0: abort(404) total_page = int(math.ceil(g.user['gossip'] * 1.0 / config.ITEMS_PER_PAGE)) gossip_list = list(Gossip.select().where(Gossip.uid == uid).order_by( Gossip.t.desc(), Gossip.id.desc()).paginate(page, config.ITEMS_PER_PAGE).dicts()) return render_template("gossip_list.html", page=page, total_page=total_page, gossip_list=gossip_list)
def update_fetch_info(uid): from models import database, FetchedUser, User, Status, Gossip, Album, Photo, Blog with database: user = User.get_or_none(User.uid == uid) if not user: raise KeyError("no such user") fetched_info = model_to_dict(user) fetched_info.update( status=Status.select().where(Status.uid == uid).count(), gossip=Gossip.select().where(Gossip.uid == uid).count(), album=Album.select().where(Album.uid == uid).count(), photo=Photo.select().where(Photo.uid == uid).count(), blog=Blog.select().where(Blog.uid == uid).count(), ) FetchedUser.insert(**fetched_info).on_conflict('replace').execute() print('update fetched info {fetched_info}'.format( fetched_info=fetched_info)) return True