def login_user(self, username: str, password: str, captcha: str, captcha_id: str, ip: str, browser: str) -> dict: # verify captcha in the first place. redis = Redis().r correct_captcha = redis.get(captcha_id) if correct_captcha is None: return { "status_code": HTTPStatus.BAD_REQUEST, "message": "验证码已过期", "status": False } elif correct_captcha.lower() == captcha.lower(): redis.expire(captcha_id, 0) else: return { "status_code": HTTPStatus.FORBIDDEN, "message": "验证码错误", "status": False } # check user account is locked. data = self.db["users"].find_one({"username": username}) or {} if data.get("status", {}).get("disable"): return { "status_code": HTTPStatus.FORBIDDEN, "status": False, "message": data.get("status", {}).get("reason") } returned_value = {"status_code": 0, "message": ""} if data: # try to login stored_password = data["password"] if pbkdf2_sha256.verify(password, stored_password): returned_value["status_code"] = HTTPStatus.OK else: returned_value["status_code"] = HTTPStatus.FORBIDDEN returned_value["message"] = "用户名或密码错误" else: # register hash_value = pbkdf2_sha256.hash(password) try: self.db["users"].insert_one( dict(username=username, password=hash_value, date=ts_date(), ip=ip, browser=browser)) returned_value["status_code"] = HTTPStatus.CREATED except Exception as e: returned_value[ "status_code"] = HTTPStatus.INTERNAL_SERVER_ERROR returned_value["message"] = str(e) returned_value["username"] = data.get("username") returned_value["group"] = data.get("group", ["user"]) return returned_value
def __init__(self, file, output): self.file = file self.output = output self.context = iter(ET.iterparse(file)) self.database = Redis() self.par_re = re.compile( ur'\[\[([\w, \(\)"]+)\]\]') #For parsing for article links self.redirect_re = re.compile(ur'#REDIRECT', re.UNICODE) '''
def verify_email(self, username, code): r = Redis().r email = self.db["users"].find_one({"username": username})["email"]["address"] verify_data = r.hgetall(email) wrong_count = int(verify_data["wrong"]) MAX = 10 if wrong_count >= MAX: self.db["users"].update_one({"username": username}, {"$set": {"status": {"disable": True, "reason": "verify email crack"}}} ) return {"status": False, "status_code": HTTPStatus.FORBIDDEN, "message": "Account locked. Please stay away"} correct_code = verify_data["code"] if correct_code == code: r.expire(email, 0) r.expire(f"timeout-{email}", 0) self.db["users"].update_one({"username": username}, {"$set": {"email.verified": True}} ) return {"status": True, "status_code": HTTPStatus.CREATED, "message": "success"} else: r.hset(email, "wrong", wrong_count + 1) return {"status": False, "status_code": HTTPStatus.FORBIDDEN, "message": f"verification code is incorrect. You have {MAX - wrong_count} attempts remaining"}
class SecurityHandler(web.RequestHandler): key = "user_blacklist" def __init__(self, application, request, **kwargs): super().__init__(application, request, **kwargs) self.r = Redis().r def prepare(self): if self.check_request(): self.set_status(HTTPStatus.FORBIDDEN) self.finish() def data_received(self, chunk): pass def check_request(self): ban = self.__ip_check() user = self.__user_check() result = ban or user if result: self.ban() return result def get_real_ip(self): x_real = self.request.headers.get("X-Real-IP") remote_ip = self.request.remote_ip logging.debug("X-Real-IP:%s, Remote-IP:%s", x_real, remote_ip) return x_real or remote_ip def ban(self): ip = self.get_real_ip() self.r.incr(ip) count = int(self.r.get(ip)) # ban rule: (count-10)*600 if count <= 10: ex = 120 else: ex = (count - 10) * 600 if count >= 30: add_cf_blacklist(ip) self.r.set(ip, count, ex) user = self.get_current_user() if user: self.r.hincrby(self.key, user) def get_current_user(self) -> str: username = self.get_secure_cookie("username") or b"" return username.decode("u8") def __user_check(self): count = self.r.hget(self.key, self.get_current_user()) or 0 count = int(count) if count >= 20: return True def __ip_check(self): d = self.r.get(self.get_real_ip()) or 0 if int(d) >= 10: return True
def get_latest_resource() -> dict: redis = Redis().r key = "latest-resource" latest = redis.get(key) if latest: logging.info("Cache hit for latest resource") latest = json.loads(latest) latest["data"] = latest["data"][:100] else: logging.warning("Cache miss for latest resource") latest = ResourceLatestMongoResource().query_db() redis.set(key, json.dumps(latest, ensure_ascii=False)) return latest
def import_ban_user(self): usernames = self.db["users"].find({"status.disable": True}, projection={"username": True}) r = Redis().r r.delete("user_blacklist") logging.info("Importing ban users to redis...%s", usernames) for username in [u["username"] for u in usernames]: r.hset("user_blacklist", username, 100) r.close()
def __init__(self, file, output): self.file = file self.output = output self.context = iter(ET.iterparse(file)) self.database = Redis() self.par_re = re.compile(ur'\[\[([\w, \(\)"]+)\]\]') # For parsing for article links self.redirect_re = re.compile(ur"#REDIRECT", re.UNICODE) """
def update_user_info(self, username: str, data: dict) -> dict: redis = Redis().r valid_fields = ["email"] valid_data = {} for field in valid_fields: if data.get(field): valid_data[field] = data[field] if valid_data.get("email") and not re.findall(r"\S@\S", valid_data.get("email")): return { "status_code": HTTPStatus.BAD_REQUEST, "status": False, "message": "email format error " } elif valid_data.get("email"): # rate limit user_email = valid_data.get("email") timeout_key = f"timeout-{user_email}" if redis.get(timeout_key): return { "status_code": HTTPStatus.TOO_MANY_REQUESTS, "status": False, "message": f"try again in {redis.ttl(timeout_key)}s" } verify_code = random.randint(10000, 99999) valid_data["email"] = {"verified": False, "address": user_email} # send email confirm subject = "[人人影视下载分享站] 请验证你的邮箱" body = f"{username} 您好,<br>请输入如下验证码完成你的邮箱认证。验证码有效期为24小时。<br>" \ f"如果您未有此请求,请忽略此邮件。<br><br>验证码: {verify_code}" redis.set(timeout_key, username, ex=1800) redis.hset(user_email, mapping={"code": verify_code, "wrong": 0}) redis.expire(user_email, 24 * 3600) send_mail(user_email, subject, body) self.db["users"].update_one({"username": username}, {"$set": valid_data}) return { "status_code": HTTPStatus.CREATED, "status": True, "message": "success" }
class Crawler: def __init__(self, file, output): self.file = file self.output = output self.context = iter(ET.iterparse(file)) self.database = Redis() self.par_re = re.compile( ur'\[\[([\w, \(\)"]+)\]\]') #For parsing for article links self.redirect_re = re.compile(ur'#REDIRECT', re.UNICODE) ''' Regular Expression (above): matches [[word]] where 'word' can have any alphanumeric characters, along with the characters ",() ''' def parse_article(self, text): if text is not None: return self.par_re.findall(text) else: return [] def is_redirect(self, text): if text is not None: return self.redirect_re.findall(text) def crawl(self): event, root = self.context.next() self.output.write("Started") for event, elem in self.context: if elem.tag == '{http://www.mediawiki.org/xml/export-0.4/}page': title = elem.find( '{http://www.mediawiki.org/xml/export-0.4/}title').text #self.output.write("%s\n" % title) ''' if self.is_redirect(elem.find('./{http://www.mediawiki.org/xml/export-0.4/}revision/{http://www.mediawiki.org/xml/export-0.4/}text').text): self.database.add_set("redirects",title) ''' if not self.database.is_member_set("redirects", title): self.database.add_set("articles", title) links = self.parse_article( elem.find( './{http://www.mediawiki.org/xml/export-0.4/}revision/{http://www.mediawiki.org/xml/export-0.4/}text' ).text) for link in links: if not self.database.is_member_set("redirects", link): self.database.add_set(title, link) elem.clear()
class Crawler: def __init__(self, file, output): self.file = file self.output = output self.context = iter(ET.iterparse(file)) self.database = Redis() self.par_re = re.compile(ur'\[\[([\w, \(\)"]+)\]\]') # For parsing for article links self.redirect_re = re.compile(ur"#REDIRECT", re.UNICODE) """ Regular Expression (above): matches [[word]] where 'word' can have any alphanumeric characters, along with the characters ",() """ def parse_article(self, text): if text is not None: return self.par_re.findall(text) else: return [] def is_redirect(self, text): if text is not None: return self.redirect_re.findall(text) def crawl(self): event, root = self.context.next() self.output.write("Started") for event, elem in self.context: if elem.tag == "{http://www.mediawiki.org/xml/export-0.4/}page": title = elem.find("{http://www.mediawiki.org/xml/export-0.4/}title").text # self.output.write("%s\n" % title) """ if self.is_redirect(elem.find('./{http://www.mediawiki.org/xml/export-0.4/}revision/{http://www.mediawiki.org/xml/export-0.4/}text').text): self.database.add_set("redirects",title) """ if not self.database.is_member_set("redirects", title): self.database.add_set("articles", title) links = self.parse_article( elem.find( "./{http://www.mediawiki.org/xml/export-0.4/}revision/{http://www.mediawiki.org/xml/export-0.4/}text" ).text ) for link in links: if not self.database.is_member_set("redirects", link): self.database.add_set(title, link) elem.clear()
class ResourceMongoResource(ResourceResource, Mongo): redis = Redis().r def fansub_search(self, class_name: str, kw: str): class_ = globals().get(class_name) result = class_().search_preview(kw) result.pop("class") if result: return list(result.values()) else: return [] def get_resource_data(self, resource_id: int, username: str) -> dict: data = self.db["yyets"].find_one_and_update( {"data.info.id": resource_id}, {'$inc': { 'data.info.views': 1 }}, {'_id': False}) if username: user_like_data = self.db["users"].find_one({"username": username}) if user_like_data and resource_id in user_like_data.get( "like", []): data["is_like"] = True else: data["is_like"] = False return data def search_resource(self, keyword: str) -> dict: projection = { '_id': False, 'data.info': True, } data = self.db["yyets"].find( { "$or": [ { "data.info.cnname": { '$regex': f'.*{keyword}.*', "$options": "-i" } }, { "data.info.enname": { '$regex': f'.*{keyword}.*', "$options": "-i" } }, { "data.info.aliasname": { '$regex': f'.*{keyword}.*', "$options": "-i" } }, ] }, projection) data = list(data) returned = {} if data: returned = dict(data=data) returned["extra"] = [] else: extra = self.fansub_search(ZimuxiaOnline.__name__, keyword) or \ self.fansub_search(NewzmzOnline.__name__, keyword) or \ self.fansub_search(ZhuixinfanOnline.__name__, keyword) or \ self.fansub_search(CK180Online.__name__, keyword) returned["data"] = [] returned["extra"] = extra return returned
class ResourceMongoResource(ResourceResource, Mongo): redis = Redis().r def fansub_search(self, class_name: str, kw: str): class_ = globals().get(class_name) result = class_().search_preview(kw) result.pop("class") if result: return list(result.values()) else: return [] def get_resource_data(self, resource_id: int, username: str) -> dict: data = self.db["yyets"].find_one_and_update( {"data.info.id": resource_id}, {'$inc': {'data.info.views': 1}}, {'_id': False}) if username: user_like_data = self.db["users"].find_one({"username": username}) if user_like_data and resource_id in user_like_data.get("like", []): data["is_like"] = True else: data["is_like"] = False return data def search_resource(self, keyword: str) -> dict: final = [] returned = {} projection = {'_id': False, 'data.info': True, } resource_data = self.db["yyets"].find({ "$or": [ {"data.info.cnname": {'$regex': f'.*{keyword}.*', "$options": "-i"}}, {"data.info.enname": {'$regex': f'.*{keyword}.*', "$options": "-i"}}, {"data.info.aliasname": {'$regex': f'.*{keyword}.*', "$options": "-i"}}, ]}, projection ) for item in resource_data: item["data"]["info"]["origin"] = "yyets" final.append(item["data"]["info"]) # get comment r = CommentSearchMongoResource().get_comment(1, 2 ** 10, keyword) c_search = [] for c in r.get("data", []): comment_rid = c["resource_id"] d = self.db["yyets"].find_one({"data.info.id": comment_rid}, projection={"data.info": True}) c_search.append( { "username": c["username"], "date": c["date"], "comment": c["content"], "commentID": c["id"], "resourceID": comment_rid, "resourceName": d["data"]["info"]["cnname"], "origin": "comment" } ) if final: returned = dict(data=final) returned["extra"] = [] else: # TODO how to generate code using ORDER here extra = self.fansub_search(ZimuxiaOnline.__name__, keyword) or \ self.fansub_search(NewzmzOnline.__name__, keyword) or \ self.fansub_search(ZhuixinfanOnline.__name__, keyword) or \ self.fansub_search(XL720.__name__, keyword) or \ self.fansub_search(BD2020.__name__, keyword) returned["data"] = [] returned["extra"] = extra # add comment data here returned["comment"] = c_search return returned def patch_resource(self, new_data: dict): rid = new_data["resource_id"] new_data.pop("resource_id") old_data = self.db["yyets"].find_one( {"data.info.id": rid}, ) new_data["season_cn"] = self.convert_season(new_data["season_num"]) # 1. totally empty resource: if len(old_data["data"]["list"]) == 0: new_data["season_cn"] = self.convert_season(new_data["season_num"]) old_data["data"]["list"].append(new_data) else: for season in old_data["data"]["list"]: if new_data["season_num"] in [season["season_num"], int(season["season_num"])]: user_format = new_data["formats"][0] for u in new_data["items"][user_format]: season["items"][user_format].append(u) self.db["yyets"].find_one_and_replace( {"data.info.id": rid}, old_data ) def add_resource(self, new_data: dict): rid = self.get_appropriate_id() new_data["data"]["info"]["id"] = rid self.db["yyets"].insert_one(new_data) return {"status": True, "message": "success", "id": rid} def delete_resource(self, data: dict): rid = data["resource_id"] meta = data.get("meta") if meta: db_data = self.db["yyets"].find_one({"data.info.id": rid}) for season in db_data["data"]["list"]: for episode in season["items"].values(): for v in episode: if v["episode"] == meta["episode"] and v["name"] == meta["name"] and \ v["size"] == meta["size"] and v["dateline"] == meta["dateline"]: episode.remove(v) # replace it self.db["yyets"].find_one_and_replace({"data.info.id": rid}, db_data) else: self.db["yyets"].delete_one({"data.info.id": rid}) def get_appropriate_id(self): col = self.db["yyets"] random_id = random.randint(50000, 80000) data = col.find_one({"data.info.id": random_id}, projection={"_id": True}) if data: return self.get_appropriate_id() else: return random_id @staticmethod def convert_season(number: [int, str]): pass if number in (0, "0"): return "正片" else: return f"第{number}季"
def refresh_latest_resource(self): redis = Redis().r logging.info("Getting new resources...") latest = self.query_db() redis.set("latest-resource", json.dumps(latest, ensure_ascii=False)) logging.info("latest-resource data refreshed.")
def __init__(self, application, request, **kwargs): super().__init__(application, request, **kwargs) self.r = Redis().r
def __init__(self): self.database = Redis() self.G = None graph_dir = path.join(path.dirname(__file__), 'graph_gexf.gexf') self.load_graph_gexf(graph_dir)
class GraphUtil: def __init__(self): self.database = Redis() self.G = None graph_dir = path.join(path.dirname(__file__), 'graph_gexf.gexf') self.load_graph_gexf(graph_dir) def build_redis_to_adj(self, filename): self.G = nx.DiGraph() articles = self.database.get_slist_members("articles") for article in articles: links = self.database.get_slist_members(article) for link in links: if link in articles: self.G.add_edge(link,article) nx.write_gml(self.G,filename) def build_adj_to_gml(self, filename, path): self.G = nx.read_adjlist(filename,delimiter=",",create_using=nx.DiGraph(data=True)) nx.write_gml(self.G,path) def load_graph_gml(self, filename): self.G = nx.read_gml(filename, relabel=True) def write_graph_gexf(self, filename): nx.write_gexf(self.G, filename) def load_graph_gexf(self, filename): self.G = nx.read_gexf(filename) def load_graph_adj(self, filename): self.G = nx.read_adjlist(filename, delimiter=",",create_using=nx.DiGraph()).reverse(copy=False) def degree_histogram(self): return nx.degree_histogram(self.G) def shortest_path(self, start, end): return nx.shortest_path(self.G,start,end) def path_subgraph(self, path): return self.G.subgraph(path) ''' DJANGO helper functions ''' def top_nodes(self, n): nodes = sorted(self.G.reverse(copy=True).degree_iter(),key=itemgetter(1),reverse=True)[1:n] tops = [i[0] for i in nodes] return tops def neighbor_subgraph(self, node, in_edges=True): if in_edges: neighbors = self.G.reverse(copy=True).neighbors(node) else: neighbors = self.G.neighbors(node) neighbors.append(node) if "" in neighbors: neighbors.remove("") g = self.G.subgraph(neighbors) return g def create_gexf(self, nodes, filename): for n in nodes.nodes(data=True): n[1]["viz"] = {"color":{'r':random.randint(0,256),'g':random.randint(0,256),'b':random.randint(0,256),'a':0}} nx.write_gexf(nodes, filename) def graph_stats(self, n): stats = {} stats['Top'] = self.top_nodes(n+1) stats['Pagerank'] = nx.pagerank_scipy(self.G) stats['Pagerank'] = sorted(stats['Pagerank'].iteritems(), key=itemgetter(1),reverse=True)[0:n+1] stats['Articulation Points'] = list(nx.articulation_points(self.G.to_undirected())) stats['Histogram'] = self.degree_histogram()[1:26] return stats