def searchPostHtml(request: dict): # print(request) site_type_re = { "nha.chotot.com": { "land": r"^.*/mua-ban-dat/.*$", "house": r"^.*/mua-ban-nha-dat/.*$", "apartment": r"^.*/mua-ban-can-ho-chung-cu/.*$" }, "nhadat247.com.vn": { "land": r"^.*nhadat247.com.vn/ban-dat.*$", "apartment": r"^.*nhadat247.com.vn/ban-can-ho-chung-cu.*$", "house": r"^.*nhadat247.com.vn/ban-nha.*$" }, "batdongsan.com.vn": { "land": r"^.*batdongsan.com.vn/ban-dat.*$", "apartment": r"^.*batdongsan.com.vn/ban-can-ho-chung-cu.*$", "house": r"^.*batdongsan.com.vn/ban-nha.*$" } } try: db = DBObject() _site = request["site"] if "site" in request else None _crawl_date = request["crawl_date"] if "crawl_date" in request else None _post_date = request["post_date"] if "post_date" in request else None _type = request["type"] if "type" in request else "all" _limit = int(request["limit"]) if ( "limit" in request) and len(request["limit"]) > 0 else 0 list_filter = [] if _site in site_type_re: list_filter.append( {"url": { "$regex": "^https://%s/.*$" % (_site) }}) if _type in site_type_re[_site]: list_filter.append({"url": {"$regex": site_type_re[_site][_type]}}) else: list_filter.append({ "$or": [{ "url": { "$regex": site_type_re[_site][_t] } } for _t in site_type_re[_site]] }) _d_range = d_range(_crawl_date) if len(_d_range) > 0: list_filter.append({ "$or": [{ "date": { "$regex": "^[0-9]{2}/%s/%s$" % (m, y) } } for m, y in _d_range] }) _d_range = d_range(_post_date) if len(_d_range) > 0: list_filter.append({ "$or": [{ "post_date": { "$regex": "^[0-9]{2}/%s/%s$" % (m, y) } } for m, y in _d_range] }) query_return = [] for post in db.query_html_db(query_dict={"$and": list_filter}, limit=_limit): post.pop("html") post.pop("_id") post["html"] = "content is eliminated" query_return.append(post) # print(query_return[0]) return {"code": 200, "message": "successfull", "content": query_return} except: # traceback.print_exc() return {"code": 404, "message": "failed", "content": []}