def json_index(index): print 'get result from index',index try: query =Query(DeviceKey) query.equal_to('index',index) devicekey=query.first() key=devicekey.id lat = devicekey.get('lat') lng = devicekey.get('lng') except: return jsonify(error='invalid index') try: data=get_latest(key) except: return jsonify(error='no data',key=key) ch2o = data.get('ch2o') if ch2o ==65535: ch2o=0 ch2o =round(ch2o*1.32/1000,2) data.set('ch2o',ch2o) tem =round(data.get('t')/10.0-40.0,1) hum =data.get('h') nosie =data.get('noise') pm =data.get('pm') ch2o =data.get('ch2o') return jsonify(key = key,tem=tem,hum=hum,noise=nosie,pm=pm,ch2o=ch2o,createdAt=data.created_at,index=index,heze_rate= 80,tem_od=23.3,hum_od=12,pm_od=100,longitude=lng,latitude=lat)
def user_info_by_username(username, showfunc=showo_inDict): query = Query("_User") query.include("sioeyeInfo") query.include("sioeyeInfo.avatar") query.equal_to('username', username) user = query.first() user_info = user.get("sioeyeInfo") user_avatar = user_info.get("avatar") (u, ui, ua) = (None, None, None) if user is not None: print "=== user ===" showfunc(user) u = user if user_info is not None: print "=== user info ===" showfunc(user_info) ui = user_info if user_avatar is not None: print "=== user avatar===" showfunc(user_avatar) ua = user_avatar # print "%s: %s" % (username, user_info.get("sioeyeId")) print "------------ next ------------" return u, ui, ua
def set_activity(uid, month, money, calories): activity = Activity() query = Query(Activity) query.equal_to("uid", uid) query.equal_to("start_time", month) if query.count() == 0: entry = Activity() entry.set("uid", uid) entry.set("start_time", month) entry.set("duration", 30) entry.set("budget_money", money) entry.set("budget_cal", calories) entry.set("curr_money", 0) entry.set("curr_cal",0) entry.set("curr_time",0) entry.set("curr_ubers",0) # todo: duration, time! entry.save() else: query.equal_to("uid", uid) query.equal_to("start_time", month) entry = query.first() entry.set("budget_money", money) entry.set("budget_cal", calories) entry.save()
def user_by_username(username): query = Query("_User") query.equal_to('username', username) user = query.first() # if user is not None: # showo_inDict(user) return user
def set_activity(uid, money, calories, est_price): now = datetime.datetime.now() month = now.year*100 + now.month query = Query(Activity) query.equal_to("uid", uid) query.equal_to("start_time", month) if query.count() == 0: ubers = money/est_price entry = Activity() entry.set("uid", uid) entry.set("start_time", month) entry.set("duration", 30) entry.set("budget_money", money) entry.set("budget_ubers", ubers) entry.set("budget_cal", calories) entry.set("curr_money", 0) entry.set("curr_cal", 0) entry.set("curr_time", 0) entry.set("curr_ubers", 0) # todo: duration, time! entry.save() else: ubers = money/est_price entry = query.first() entry.set("budget_money", money) entry.set("budget_cal", calories) entry.set("budget_ubers", ubers) entry.save()
def consume(self): queue = SQS() rs = queue.read() print 'Start consuming:' for record in rs: mailId = record.get_body() print mailId Timer = Object.extend('Timer') timerQuery = Query(Timer) timerQuery.equal_to('mailId', mailId) firstTimer = timerQuery.first() if firstTimer.get('status') != 'sent': Mail = Object.extend('Mail') query = Query(Mail) mailObj = query.get(mailId) sender = mailer() mailToSent = { 'to':mailObj.get('to'), 'html':mailObj.get('html'), 'subject':mailObj.get('subject') } sender.send(mailToSent) firstTimer.set('status','sent') firstTimer.save() queue.remove(record) return self;
def run(): while 1: query = Query(Business) query.equal_to('geo_point', None) current_business = query.first() temp = current_business.address.values() current_address = " ".join(temp).replace("(", " ").replace(")", " ").replace(u'号', " ") try: lat, lon = get_coordinates(current_address) if lat == None or lon == None: print(1) lat, lon = get_coordinates(temp[1]+" "+temp[0]+" "+temp[2]) if lat == None or lon == None: print(2) lat, lon = get_coordinates(temp[0] + " " + temp[1]) if lat == None or lon == None: print(3) lat, lon = get_coordinates(current_address[:current_address.find(u'号')]) if lat == None or lon == None: print(4) lat, lon = get_coordinates(temp[0]+" "+temp[2]) if lat == None or lon == None: print(5) lat, lon = get_coordinates(current_address.translate(None, string.digits)) except (ValueError, TypeError): lat = 0 lon = 0 print("ERROR: ", current_business.attributes) print("----------------------------------------------") point = GeoPoint(latitude=lat, longitude=lon) current_business.set("geo_point", point) current_business.save()
def traverse_point(subject, point_type, items, parent=None, prefix=''): for i, el in enumerate(items): # print prefix, i, el.label.a.string, is_leaf = el.ul is None origin_url = el.label.a['href'] data = { 'origin_url': origin_url, 'is_leaf': is_leaf, 'type': point_type, 'order': i, 'subject': subject, 'text': el.label.a.string } query = Query(Point) query.equal_to('origin_url', origin_url) try: point = query.first() except LeanCloudError: point = Point(**data) else: for k, v in data.items(): point.set(k, v) if parent: point.set('parent', parent) point.save() if not is_leaf: traverse_point(subject, point_type, el.ul, point, '--' + prefix)
def dislike(**params): card_id = params['cid'] user_id = params['uid'] card = Card.create_without_data(card_id) user = User.create_without_data(user_id) query = Query(Like) query.equal_to('card', card) query.equal_to('user', user) count = query.count() if count > 0: try: likes = query.first() likes.destroy() card.increment('likes', -1) card.fetch_when_save = True card.save() return {'code': 200, 'message': 'ok'} except LeanCloudError as e: result = {'code': e.code, 'message': e.error} return result else: result = {'code': 400, 'message': '点赞记录不存在'} return result
def result_index(index): print 'get result from index', index try: query = Query(DeviceKey) query.equal_to('index', index) devicekey = query.first() device_name = devicekey.get('name_cn') key = devicekey.id except: return jsonify(error='invalid index') try: data = get_latest(key) hour = data.created_at.hour + 8 ch2o = data.get('ch2o') if ch2o == 65535: ch2o = 0 ch2o = round(ch2o * 1.32 / 1000, 2) data.set('ch2o', ch2o) except: return jsonify(error='invalid key') try: local_time = utc2local(data.created_at) except: print 'local time change error' local_time = data.created_a #pm =average_pm(24,key) pm = min_pm(key) noise = min_noise(key) return render_template('result.html', esp_test=data, local_time=local_time, pm=pm, noise=noise, index=index, device_name=device_name)
def result_index(index): print 'get result from index',index try: query =Query(DeviceKey) query.equal_to('index',index) devicekey=query.first() device_name = devicekey.get('name_cn') key=devicekey.id except: return jsonify(error='invalid index') try: data=get_latest(key) hour=data.created_at.hour+8 ch2o = data.get('ch2o') if ch2o ==65535: ch2o=0 ch2o =round(ch2o*1.32/1000,2) data.set('ch2o',ch2o) except: return jsonify(error='invalid key') try: local_time=utc2local(data.created_at) except: print 'local time change error' local_time = data.created_a #pm =average_pm(24,key) pm =min_pm(key) noise = min_noise(key) return render_template('result.html', esp_test=data,local_time=local_time,pm=pm,noise=noise,index=index,device_name = device_name)
def process_sj_range(item): code = item['code'].strip() if not code: return data = { 'page_num': int(item['page_num']), 'code': code, 'type': 'sj', 'subject': item['subject'] } query = Query(Range) query.equal_to('code', code) query.equal_to('type', 'sj') try: one = query.first() except LeanCloudError: one = Range() last_page_num = 0 one.set('state', 0) else: last_page_num = one.get('page_num') if last_page_num != data['page_num']: one.set('state', 0) else: one.set('state', 1) for k, v in data.items(): one.set(k, v) one.set('last_page_num', last_page_num) one.save() return item
def index(): total = Query.do_cloud_query('select count(*) from Photo') print total.count try: query = Query(Photo).descending('createdAt').skip(randint(0, total.count)) cover = query.first() except LeanCloudError, e: raise e
def xu(message): query = Query(Haha) haha = query.first() life = int(haha.get('life')) + 1 haha.increment('life', 1) haha.save() reply = get_nickname(message.from_user) + ' 续了 1 秒,excited! 已经续了 ' + str(life) + ' 秒了。' bot.sendMessage(chat_id=message.chat.id, text=reply)
def get_latest(key): try: query =Query(test_esp) query.equal_to('key',key) query.descending('createdAt') latest = query.first() return latest except: rasie
def is_exist(post_id): query = Query(Search) query.equal_to('post_id', post_id) try: obj = query.first() print 'post_id exist', post_id return True except: # not exist return False
def _latest_live_by_creater_startswith_title(username, title): lq = Query("Live") caster = user_by_username(username) lq.equal_to("caster", caster) lq.startswith("keyword", title) lq.descending("createdAt") live = lq.first() showo_inDict(live) return live
def create_group(args): group_id = args.get('id') query = Query(DashboardGroup) query.equal_to('objectId', group_id) group = query.first() if query.count() else DashboardGroup() group.clear() for k, v in args.items(): group.set(k, v) group.save()
def xu(message): query = Query(Haha) haha = query.first() life = int(haha.get('life')) + 1 haha.increment('life', 1) haha.save() reply = get_nickname( message.from_user) + ' 续了 1 秒,excited! 已经续了 ' + str(life) + ' 秒了。' bot.sendMessage(chat_id=message.chat.id, text=reply)
def get_latest(key): try: query = Query(test_esp) query.equal_to('key', key) query.descending('createdAt') latest = query.first() return latest except: rasie
def exist_file(post_ID): query = Query(PostShort) query.equal_to("post_ID", int(post_ID)) try: obj = query.first() print "finded", post_ID return True except: print "not find" return False
def _remove_m_in_conv_id(delmem, convid): cq = Query("_Conversation") cq.equal_to("objectId", convid) conv = cq.first() mem_list = conv.get("m") mem = list(set(mem_list) - set(delmem)) # print mem conv.set("m", list(set(mem))) conv.save()
def getModel(algo_type, model_tag, event_type): query = Query(Model) query.equal_to("algoType", algo_type) query.equal_to("tag", model_tag) query.equal_to("eventType", event_type) query.descending("timestamp") model_info = query.first() model_param = model_info.get("param") status_sets = model_info.get("statusSets") return {"modelParam": model_param, "statusSets": status_sets}
def exist_file(post_ID): query = Query(PostShort) query.equal_to('post_ID', int(post_ID)) try: obj = query.first() print 'finded', post_ID return True except: print 'not find' return False
def del_schedule(uid, month, to_work): query = Query(Schedule) query.equal_to("uid", uid) query.equal_to("month", month) query.equal_to("to_work", to_work) if query.count == 0: return else: entry = query.first() entry.destroy()
def get_info(name): query = Query(FeedInfo).equal_to('name', name) try: info = query.first() except LeanCloudError, e: if e.code == 101: info = FeedInfo() info.set('name', name) else: raise (e)
def query_user_by_uid(uid): from models import User user_query = Query(User) user_query.equal_to('uid', uid) try: # 已注册用户 user = user_query.first() return user except leancloud.LeanCloudError: # todo return None
def _add_m_in_conv_id(newmem, convid): cq = Query("_Conversation") cq.equal_to("objectId", convid) conv = cq.first() mem_list = conv.get("m") mem = mem_list + newmem # print mem # print list(set(mem)) # remove the repeated item conv.set("m", list(set(mem))) conv.save()
def user_by_sioeyeid(id): query = Query("UserInfo") query.equal_to("sioeyeId", id) query.include("userId") ui = None try: ui = query.first() except leancloud.errors.LeanCloudError: pass user = None if ui is not None: user = ui.get("userId") return user
def next_cover(): data = {} total = Query.do_cloud_query('select count(*) from Photo') try: query = Query(Photo).descending('createdAt').skip(randint(0, total.count)) cover = query.first() data['cover'] = cover.get('url') + '?imageView2/2/w/1920/interlace/1' data['original'] = cover.get('url') + '?download' except LeanCloudError, e: if e.code == 101: # 服务端对应的 Class 还没创建 data = {} else: raise e
def get_user_num(page_url): ''' 查询页面用户数 :param page_url: :return: 当前页面用户数 ''' cls_page = Object.extend('UserInPage') query_page = Query(cls_page) query_page.equal_to('page_url', page_url) try: result = query_page.first() except Exception, e: return 0
def getModel(algo_type, model_tag, event_type): query = Query(Model) query.equal_to("algoType", algo_type) query.equal_to("tag", model_tag) query.equal_to("eventType", event_type) query.descending("timestamp") model_info = query.first() model_param = model_info.get("param") status_sets = model_info.get("statusSets") return { "modelParam": model_param, "statusSets": status_sets }
def change_name(index): if request.method == 'POST': name_cn = request.form.get('name_cn'); if(name_cn==''): name_cn = None query =Query(DeviceKey) query.equal_to('index',index) devicekey=query.first() devicekey.set('name_cn',name_cn) devicekey.save() return redirect('/status') else: return render_template('changename.html')
def change_name(index): if request.method == 'POST': name_cn = request.form.get('name_cn') if (name_cn == ''): name_cn = None query = Query(DeviceKey) query.equal_to('index', index) devicekey = query.first() devicekey.set('name_cn', name_cn) devicekey.save() return redirect('/status') else: return render_template('changename.html')
def insert_or_update(model, cond, data, saving=True): query = Query(model) for k, v in cond.items(): query.equal_to(k, v) try: one = query.first() except LeanCloudError: one = model() for k, v in data.items(): one.set(k, v) if saving: one.save() return one
def upload_file(file_set): """Upload files to leancloud and write uploaded file's name to uploaded.txt """ print len(file_set) for eachfile in file_set: #raw_input() filename = os.path.basename(eachfile) print 'processing file', filename local_file = open(eachfile, 'r') json_obj = json.load(local_file) post_obj = init_Post_obj(json_obj) siteTagPost_obj = init_SiteTagPost_obj(json_obj, post_obj) Post = Object.extend('Post') try: query = Query(Post) query.equal_to('ID', int(post_obj.get('ID'))) query_obj = query.first() print "finded", query_obj except: import traceback traceback.print_exc() try: post_obj.save() siteTagPost_obj.save() time.sleep(0.7) except: import traceback traceback.print_exc() time.sleep(1) continue finally: print '%s is uploaded' % filename local_file.close() with open('uploaded.txt', 'a+') as f: f.write(eachfile+'\n') local_file.close() else: time.sleep(1) if ( query_obj.get('html') == post_obj.html and query_obj.get('brief') == post_obj.brief ): continue else: query_obj.set('html', post_obj.html) query_obj.set('brief', post_obj.brief) query_obj.save()
def get_activity(uid, month): query = Query(Activity) query.equal_to("uid", uid) query.equal_to("start_time", month) entry = query.first() obj = { 'budget_money': entry.get("budget_money"), 'budget_cal': entry.get("budget_cal"), 'curr_money': entry.get("curr_money"), 'curr_cal': entry.get("curr_cal"), 'curr_time': entry.get("curr_time"), 'curr_ubers': entry.get("curr_ubers"), } return json.dumps(obj)
def upload_file(file_set): """Upload files to leancloud and write uploaded file's name to uploaded.txt """ print len(file_set) for eachfile in file_set: #raw_input() filename = os.path.basename(eachfile) print 'processing file', filename local_file = open(eachfile, 'r') json_obj = json.load(local_file) post_obj = init_Post_obj(json_obj) siteTagPost_obj = init_SiteTagPost_obj(json_obj, post_obj) Post = Object.extend('Post') try: query = Query(Post) query.equal_to('ID', int(post_obj.get('ID'))) query_obj = query.first() except: import traceback traceback.print_exc() try: post_obj.save() siteTagPost_obj.save() time.sleep(0.7) except: import traceback traceback.print_exc() time.sleep(1) continue finally: print '%s is uploaded' % filename local_file.close() with open('uploaded.txt', 'a+') as f: f.write(eachfile+'\n') local_file.close() else: time.sleep(1) if ( query_obj.get('html') == post_obj.html and query_obj.get('brief') == post_obj.brief ): continue else: query_obj.set('html', post_obj.html) query_obj.set('brief', post_obj.brief) query_obj.save()
def update_nouber(uid, time, calories, money=0): now = datetime.datetime.now() month = now.year*100 + now.month query = Query(Activity) query.equal_to("uid", uid) query.equal_to("start_time", month) if query.count() == 0: return False else: entry = query.first() entry.increment("curr_money", money) entry.increment("curr_time", time) entry.increment("curr_cal", calories) return True
def set_token(uid, token): query = Query(User) query.equal_to("uid", uid) number = query.count() if number == 0: temp_user = User() temp_user.set("uid", uid) temp_user.set("token", token) temp_user.save() else: query.equal_to("uid", uid) temp_user = query.first() temp_user.set("token", token) temp_user.save()
def set_profile(uid, profile): query = Query(Profile) query.equal_to("uid", uid) if query.count() == 0: entry = Profile() entry.set("uid", uid) obj = {} else: entry = query.first() obj = entry.get("profile") for key in profile: obj[key] = profile[key] entry.set("profile", obj) entry.save()
def update_page_info(page_url, user_id): ''' 更新页面数据 :param page_url: :param user_id: :return: ''' cls_page = Object.extend('UserInPage') query_page = Query(cls_page) query_page.equal_to('page_url', page_url) try: result = query_page.first() except Exception, e: result = None
def process_question(item): point = item['point'] query = Query(Question) query.equal_to('origin_url', item['origin_url']) if point: query.include('points') try: question = query.first() except LeanCloudError: question = Question() question.set('state', 0) if point: points = question.relation('points') points.add(point) else: if point: points = question.relation('points') point_query = points.query().equal_to('objectId', point.id) try: point_query.first() except LeanCloudError: points.add(point) else: question.set('state', 1) for x in item['files']: x.pop('path', None) if item['has_image_content']: if point: question.set('small_image', item['files'][0]) else: question.set('big_image', item['files'][0]) else: question.set('content', unicode(item['content_div'])) question.set('content_images', item['files']) for f in ['content_div', 'file_urls', 'files', 'point']: del item[f] for k, v in item.items(): question.set(k, v) if question.get('small_image') or question.get('big_image'): question.set('has_image_content', True) question.save() # for item serializable return item
def update_user_info(page_url, user_id, cur_time): ''' 更新user数据,并返回上一次更新的时间,暂未使用 :param page_url: :param user_id: :param cur_time: :return: 该用户上一次更新的时间戳 ''' cls_user = Object.extend('UserInfo') query = Query(cls_user) query.equal_to('user_id', user_id) try: result = query.first() except Exception, e: result = None
def _get_rnnrbm_params(**conditions): ''' :param conditions: {"eventType":"dining_in_restaurant","tag":"latest"} :return: ''' query = Query(Rnnrbm) query.equal_to("eventType", conditions["eventType"]) #query.equal_to("note",conditions["note"]) #query.equal_to("tag","latest") query.descending("trainedAt") rnnrbm = query.first() print "rnnrbm", rnnrbm return dict(params=rnnrbm.get("params"))
def delete_page_user(page_url, user_id): ''' 删除page下某个用户 :param page_url: :param user_id: :return: ''' cls_page = Object.extend('UserInPage') query_page = Query(cls_page) query_page.equal_to('page_url', page_url) result = query_page.first() user_dict = result.get('user_data') user_dict[user_id] = 'inactive' result.set('user_data', user_dict) result.set('user_num', result.get('user_num') - 1) result.save()
def test_basic_query(): # find q = Query(GameScore) results = q.find() eq_(len(results), 10) # first q = Query(GameScore) game_score = q.first() assert game_score # get q = Query(GameScore) local_game_score = game_scores[0] q.get(local_game_score.id) # count q = Query(GameScore) eq_(q.count(), 10) # descending q = Query(GameScore).descending('score') eq_([x.get('score') for x in q.find()], range(9, -1, -1)) # greater_than q = Query(GameScore).greater_than('score', 5).ascending('score') eq_([x.get('score') for x in q.find()], range(6, 10)) q = Query(GameScore).greater_than_or_equal_to('score', 5).ascending('score') eq_([x.get('score') for x in q.find()], range(5, 10)) q = Query(GameScore).less_than('score', 5).ascending('score') eq_([x.get('score') for x in q.find()], range(0, 5)) q = Query(GameScore).less_than_or_equal_to('score', 5).ascending('score') eq_([x.get('score') for x in q.find()], range(0, 6)) q = Query(GameScore).contained_in('score', [1, 2, 3]).ascending('score') eq_([x.get('score') for x in q.find()], range(1, 4)) q = Query(GameScore).not_contained_in('score', [0, 1, 2, 3]).ascending('score') eq_([x.get('score') for x in q.find()], range(4, 10)) q = Query(GameScore).select('score') assert not q.find()[0].has('playerName')
def get_account(self): """ 获取没有使用的易题库账号 """ query = Query(YTKAccount) try: query.select('email', 'password') query.equal_to('state', 0) ytk_account = query.first() email = ytk_account.get('email') password = ytk_account.get('password') ytk_account.set('state', 1) ytk_account.save() except LeanCloudError as e: logger.error(e) raise CloseSpider('leancloud cannot reach') return [email, password]
def refresh_page_info(page_url): ''' 刷新page下用户数,暂未使用 :param page_url: :return: ''' cls_page = Object.extend('UserInPage') query_page = Query(cls_page) query_page.equal_to('page_url', page_url) result = query_page.first() user_dict = result.get('user_data') i = 0 for user_status in user_dict.values(): if user_status == 'active': i += 1 result.set('user_num', i) result.save()
def get_my_last_at(message): ''' todo: relate the origin chat id. ''' query = Query(AtMessage) query.descending('createdAt') query.equal_to('owner', message.from_user.username) query.equal_to('chat_id', message.chat.id) try: msg = query.first() except LeanCloudError as e: bot.sendMessage(chat_id=message.chat.id, reply_to_message_id=message.message_id, text='你在本群还没有任何 AT 消息。') return text = 'Here you are.' message_id = msg.get('mid') bot.sendMessage(chat_id=message.chat.id, reply_to_message_id=message_id, text=text)
def alias(message): cmd, text = parse_cmd_text(message.text) texts = parse_text_array(text) if len(texts) == 0 or len(texts) > 2: return help_for_alias(message) query = Query(Alias) query.equal_to('key', texts[0]) try: __old_a = query.first() except LeanCloudError as e: __old_a = None if __old_a != None and len(texts) == 1: __old_a.destroy() elif __old_a == None and len(texts) == 2: a = Alias() a.set('key', texts[0]) a.set('value', texts[1]) a.save() elif len(texts) == 2: __old_a.set('value', texts[1]) __old_a.save() send_successful(message)
def json_index(index): print 'get result from index', index try: query = Query(DeviceKey) query.equal_to('index', index) devicekey = query.first() key = devicekey.id lat = devicekey.get('lat') lng = devicekey.get('lng') except: return jsonify(error='invalid index') try: data = get_latest(key) except: return jsonify(error='no data', key=key) ch2o = data.get('ch2o') if ch2o == 65535: ch2o = 0 ch2o = round(ch2o * 1.32 / 1000, 2) data.set('ch2o', ch2o) tem = round(data.get('t') / 10.0 - 40.0, 1) hum = data.get('h') nosie = data.get('noise') pm = data.get('pm') ch2o = data.get('ch2o') return jsonify(key=key, tem=tem, hum=hum, noise=nosie, pm=pm, ch2o=ch2o, createdAt=data.created_at, index=index, heze_rate=80, tem_od=23.3, hum_od=12, pm_od=100, longitude=lng, latitude=lat)
def cancel(**params): card_id = params['cid'] user_id = params['uid'] card = Card.create_without_data(card_id) user = User.create_without_data(user_id) query = Query(Like) query.equal_to('card', card) query.equal_to('user', user) count = query.count() if count > 0: try: likes = query.first() likes.destroy() card.increment('likes', -1) card.fetch_when_save = True card.save() return 'ok' except LeanCloudError as e: return HttpResponseServerError(e.error) else: return 'no'
def _getConfig(config_name): query = Query(Config) query.equal_to("name", config_name) config_value = query.first().get("value") return config_value
def parse_juan_questions(subject, response): question_divs = response.xpath('//*[@class="quesdiv"]/div[1]').extract() origin_urls = response.xpath('//*[@id="js_qs"]/li[2]/a/@href').extract() types = response.xpath('//*[@id="js_qs"]/input[2]/@value').extract() levels = response.xpath( '//*[contains(@class, "handle")]/div/u[1]/i/text()').extract() view_nums = response.xpath( '//*[contains(@class, "handle")]/div/u[2]/i/text()').extract() origin_urls = clean_url(origin_urls) questions = [] # for i, html in enumerate(question_divs[5:6]): for i, html in enumerate(question_divs): soup = BeautifulSoup(html, 'lxml') for el in soup.find_all(text=lambda text: isinstance(text, Comment)): el.extract() num = soup.find(text=re.compile('\d+.')) if num: num.extract() for el in soup.find_all('font', class_='reportError'): el.extract() for el in soup.find_all('img', class_='new'): el.extract() for el in soup.find_all('span', class_='colf43'): el.extract() for el in soup.find_all('a'): for child in el.contents: el.replace_with(child) image_urls = [] for k, el in enumerate(soup.find_all('img')): lazy = el.get('lazy-src') if lazy: url = el['lazy-src'] else: url = el['src'] if url.startswith('/'): url = 'http://www.yitiku.cn%s' % url image_urls.append(url) el['src'] = k del el['lazy-src'] item = QuestionItem( **{ 'origin_url': origin_urls[i], 'level': levels[i], 'type': types[i], 'view_num': int(view_nums[i]), 'content_div': soup.div, 'file_urls': image_urls, 'subject': subject, 'has_image_content': is_image_content(soup), 'point': None }) query = Query(Question) query.equal_to('origin_url', item['origin_url']) try: question = query.first() except LeanCloudError: questions.append(item) else: remote = question.get('has_image_content') local = item['has_image_content'] if remote == local and local is False: break else: questions.append(item) return questions