def load_all_posts_visits_from_csv(bucket, csv_file_record): visits_db_name = get_visits_db_name_for_bucket(bucket) current_visits_size = hsize(visits_db_name) if current_visits_size > 5000: # 如果超过了 5k 的数量,先clear,避免过度冗余 hclear(visits_db_name) raw_objects = csv_file_record.get('objects') or [] if not raw_objects: return if not isinstance(raw_objects, (list, tuple)): return for data_obj in raw_objects[:3000]: # 最多处理 3k 条记录,避免一个 bucket 过于庞大,出现性能问题 if not isinstance(data_obj, dict): continue path = data_obj.get('path') if not path or not isinstance(path, string_types): continue path = path.strip('/').lower() if not is_a_markdown_file(path): continue visits = to_int(data_obj.get('visits'), default_if_fail=0) visitors = to_int(data_obj.get('visitors'), default_if_fail=0) visits_key = get_visits_key(path, field='visits') visitors_key = get_visits_key(path, field='visitors') hset(visits_db_name, visits_key, visits) hset(visits_db_name, visitors_key, visitors)
def invite(): bucket = get_logined_admin_bucket() if not bucket: abort(404, "not admin") cursor = request.values.get("cursor", "") if request.values.get("action") == "create": # 创建 1 个邀请码 invitations_number = to_int(request.values.get("number"), default_if_fail=1) create_invitations(limit=invitations_number) per_page = to_int(request.values.get("per_page"), default_if_fail=100) invitations = get_invitations(limit=per_page, start_code=cursor) return render_api_template_as_response('page_admin_invite.jade', invitations=invitations)
def decrease_file_size_for_bucket(bucket, file_size): file_size = to_int(file_size, default_if_fail=0) if not file_size: return if file_size > 0: file_size = -file_size zincr("_bucket_usage_file_size", bucket, file_size)
def change_bucket_expired_date(bucket, expired_date=None, days=None, order_id=None, **kwargs): if expired_date is None and days is None: # by default, 30 days free_days = to_int(get_env("free_days"), default_if_fail=30) or 30 expired_date = datetime.datetime.utcnow() + datetime.timedelta( days=free_days) elif isinstance(days, (int, float)): # 指定扩展的天数 now = datetime.datetime.utcnow() current_expired_date = get_bucket_expired_date(bucket) or now if current_expired_date < now: current_expired_date = now expired_date = current_expired_date + datetime.timedelta(days=days) if not isinstance(expired_date, datetime.datetime): try: expired_date = utc_date_parse(expired_date) except: return set_bucket_service_info(bucket, expired_date=expired_date, order_id=order_id, **kwargs)
def get_bucket_usage(bucket): name = "_bucket_usage_%s" % bucket raw_result = hgetall(name) usage = {"requests": [], "bandwidth": [], "file_size": "", "docs_count": 0} if not bucket: return usage usage["file_size"] = bytes2human(get_bucket_file_size(bucket)) usage["docs_count"] = get_records_count(bucket) usage["posts_count"] = count_records_by_type_for_bucket(bucket, "post") usage["files_count"] = count_records_by_type_for_bucket(bucket, "file") usage["images_count"] = count_records_by_type_for_bucket(bucket, "image") usage["folders_count"] = count_records_by_type_for_bucket(bucket, "folder") raw_result.reverse() for k, v in raw_result: v = to_int(v, default_if_fail=0) if not v: continue if k.endswith("r"): # requests month = k[:-1] usage["requests"].append(dict( month=month, value=v, )) elif k.endswith("b"): # bandwidth month = k[:-1] usage["bandwidth"].append(dict( month=month, value=bytes2human(v), )) return usage
def resize(obj, *args, **kwargs): # compatibility for Bitcron width = to_int(kwargs.get("width", None), default_if_fail=None) if width is not None: if width <= 800 and width > 300: return obj + "?size=m" return obj
def __init__( self, bucket, data_type, per_page=3, page=1, path=None, level_start=None, level_end=None, sort_by=None, ignore_marked_id=False, prefix_to_ignore=None, date_start=None, date_end=None, ): BasePaginator.__init__(self) z_bucket = get_order_bucket_name(bucket, data_type) self.data_type = data_type self.bucket = bucket # record bucket self.z_bucket = z_bucket # for order self.path_bucket = get_bucket_name_for_path(bucket) self.level_start = level_start self.level_end = level_end self.sort_by = sort_by self.date_start = date_start self.date_end = date_end self.prefix_to_ignore = prefix_to_ignore page = to_int(page, 1, 10000) # 最多不超过10k页 per_page = to_int(per_page, 3, 1000) # 单页不超过1k条记录 if page * per_page > 100000: # 数据总数不能超过10w条 abort(404) path = path or '' self.path = path.lower().strip('/') # prefix self.per_page = per_page self.total_pages = int(ceil(float(self.total_count) / per_page)) self.pages = self.total_pages # 总页码数 self.page = page self.next_page = page + 1 self.previous_page = page - 1 self.default_max_page_numbers = 10 self.ignore_marked_id = ignore_marked_id
def increase_request_for_bucket(bucket, num=1): # 请求数 num = to_int(num, default_if_fail=0) if not num: return now = datetime.datetime.utcnow() key = now.strftime("%Y%m") + "r" zincr("_bucket_usage_request", bucket, 1) hincr("_bucket_usage_%s" % bucket, key, num)
def set_status_code(self, code): code = to_int(code) or 200 if code in [ 200, 201, 202, 203, 204, 205, 206, 300, 301, 302, 303, 304, 305, 306, 307, 400, 401, 402, 403, 404, 405, 406, 408, 409, 410 ]: set_response_code_in_request(code) self.__dict__['code'] = code return ''
def get_all_buckets_request(score_start=0, per_page=1000): result = [] raw_result = zrscan("_bucket_usage_request", score_start=score_start or "", limit=per_page) for bucket, value in raw_result: value = to_int(value, default_if_fail=0) result.append(dict(bucket=bucket, value=value)) return result
def increase_bandwidth_for_bucket(bucket, bandwidth): # 带宽 bandwidth = to_int(bandwidth, default_if_fail=0) if not bandwidth: return now = datetime.datetime.utcnow() key = now.strftime("%Y%m") + "b" zincr("_bucket_usage_bandwidth", bucket, bandwidth) # all buckets bandwidth hincr("_bucket_usage_%s" % bucket, key, bandwidth)
def get_all_posts_visits(bucket): visits_db_name = get_visits_db_name_for_bucket(bucket) raw_result = hgetall(visits_db_name) visits_data = {} for k, v in raw_result: if '/' not in k: continue prefix, path = k.split('/', 1) path = path.strip('/').lower() visits_data.setdefault(path, {})[prefix] = to_int(v, default_if_fail=0) return visits_data
def show_records(self): if not self.bucket: return jsonify([]) else: per_page = to_int(self.raw_json_data.get("per_page")) or 10 response = show_bucket_records_for_web_request( self.bucket, cursor=self.raw_json_data.get("cursor"), per_page=per_page, includes_zero_ids=False) return response
def get_post_visits_count(doc, field='visits'): # field is in ['visits', 'visitors'] bucket = get_bucket_in_request_context() if not bucket: return 0 visits_db_name = get_visits_db_name_for_bucket(bucket) doc_path = doc.get('path') if not doc_path: return 0 key = get_visits_key(doc_path, field=field) count = hget(visits_db_name, key) or 0 count = to_int(count, default_if_fail=0) return count
def search(self, keywords=None, limit=30, sort='-date'): # 全文检索,从而得到post_list # keywords 可以是字符串,也可以是 list/tuple if not keywords: return [] limit = to_int(limit, 30) if isinstance(keywords, (tuple, list)): try: keywords = ' '.join(keywords) except: return [] if not keywords: return [] pager_name = 'search_posts' return self.data_namespace.get_data(type='post', keywords=keywords, pager_name=pager_name, min_limit=self.min_per_page, limit=limit, sort=sort)
def get_grid_factor(k, base=24, odd_base=5): # 一般有两套,比如 1/5 就对应不到 24 中 k = special_grid_factors.get(k) or k if not k: return k, base if isinstance(k, (str, unicode)): if '-' in k or '/' in k: v1, v2 = re.split(r'[-/]', k, maxsplit=1) v1 = to_int(v1) v2 = to_int(v2) if v1 and v2: small_one = min(v1, v2) big_one = max(v1, v2) if big_one == odd_base: base = odd_base k = float(small_one) / float(big_one) elif '.' in k: k = to_float(k) else: # 整数 k = to_int(k) or 1 if k and isinstance(k, int) and k > 1: k = float(k) / base if isinstance(k, (float, int)) and k <= 1: # 之前全部处理为分数了, 这样 1 就是全值了 k *= base # 处理最终的 k 值 k = to_int(round(k)) or base # 四舍五入并 int if k > base: k = base if k < 1: k = 1 return k, base
def auto_sidebar(self, side='left', sidebar_name='sidebar', width=300, default_status="hide"): # for markdown if side not in ['left', 'right']: side = 'left' width = to_int(width, 300) html = render_api_template('auto_sidebar', sidebar_name=sidebar_name, side=side, sidebar_width=width, default_status=default_status, return_html=True) return html
def check_signature_for_bucket(bucket, signature, salt=None, hours=24): if not isinstance(signature, string_types): return False if signature.count("-") != 1: return False signature_timestamp, signature_body = signature.split("-", 1) signature_timestamp = to_int(signature_timestamp, default_if_fail=0) if not signature_timestamp: return False timestamp = int(time.time()) diff = timestamp - signature_timestamp if diff > hours*60*60: # 1 day by default return False signature_should_be = get_signature_for_bucket(bucket, signature_timestamp, salt=salt) if signature == signature_should_be: return True return False
def level_to_level_start_and_end(level, path): if not level: return None, None level_start, level_end = None, None base_slash_number = path.strip('/').count('/') if path else -1 # 根目录的当做-1处理 if type(level) in [list, tuple] and len(level)==2 and level[1] > level[0]: level_start = base_slash_number+level[0] level_end = base_slash_number+level[1] elif type(level)==int: # 指定一个层级的目录 level_start = level_end = base_slash_number+level elif isinstance(level, string_types): current = base_slash_number + to_int(level.strip('<>'), 0) if level.startswith('>'): level_start = current + 1 elif level.startswith('<'): # <1 相当于0 level_end = current return level_start, level_end
def append_to_markdown_record(bucket, relative_path, content_to_append, lines_to_append=1, more_line_when_seconds_passed=0, position='tail'): record = get_record_by_path(bucket, path=relative_path) or {} record_type = record.get('_type') or record.get('type') if record_type != 'post': return 'ignore' # ignore if not is_a_markdown_file(relative_path): return 'ignore' old_content = record.get('raw_content') or record.get('content') or '' old_content = smart_unicode(old_content) now = time.time() old_timestamp = record.get('timestamp') if more_line_when_seconds_passed and old_timestamp and isinstance( old_timestamp, (int, float)): # 超过多少 seconds 之后,就会自动空一行,相当于产生了一个『段落』的逻辑 diff = now - old_timestamp if diff > more_line_when_seconds_passed: lines_to_append += 1 interval_empty_lines = '\r\n' * abs(to_int(lines_to_append, max_value=10)) # 间隔换行 content_to_append = smart_unicode(content_to_append).strip() if old_content.endswith( '\n' + content_to_append) or old_content == content_to_append: return 'ignore' # ignore, 重复的内容不处理 if position == 'tail': # 默认插入尾巴 new_content = '%s%s' % (interval_empty_lines, content_to_append) content = '%s%s' % (old_content, new_content) else: new_content = '%s%s' % (content_to_append, interval_empty_lines) content = '%s%s' % (new_content, old_content) error_info = sync_file_by_server_side(bucket=bucket, relative_path=relative_path, content=content) return error_info
def does_hit_level(record, level, path_prefix): path = record.get('path') base_slash_number = path_prefix.strip('/').count( '/') if path_prefix else -1 current_slash_number = path.strip('/').count('/') if path else -1 current_level = current_slash_number - base_slash_number if type(level) in [list, tuple ] and len(level) == 2 and level[1] > level[0]: if level[1] >= current_level >= level[0]: return True elif type(level) == int: # 指定一个层级的目录 if current_level == level: return True elif isinstance(level, string_types): int_level = to_int(level.strip('<>'), None) if int_level: if level.startswith('>') and current_level > int_level: return True elif level.startswith('<') and current_level < int_level: return True return False # at last
def _is_from_wechat(token, ttl=120): # 这里的 token,实际上实在 wechat 的后台自己设置的 rv = request.values nonce = rv.get('nonce') timestamp = rv.get('timestamp') if not nonce or not timestamp: return False values_to_sign = [token, nonce, timestamp] values_to_sign.sort() to_sign = ''.join(values_to_sign) signature = get_sha1(to_sign) if rv.get('signature') != signature: return False if ttl: timestamp = to_int(timestamp) now = time.time() if (now - timestamp) > ttl: # (默认)超过2分钟前就算请求过期了,校验失败 return False return True # at last
def get_folder_children_count(bucket, folder_path, field='posts', direct=False, files_info=None): # posts & images if not bucket and not files_info: return 0 if folder_path is None: return 0 folder_path = folder_path.strip('/').lower() if files_info is None: files_info = get_bucket_files_info(bucket) or {} if not isinstance(files_info, dict): files_info = {} folder_counts_info = files_info.get('lower_folders_count') or {} folder_matched_info = folder_counts_info.get(folder_path) or {} count_key = '%s_count' % field if direct: count_key = '_%s' % count_key num = folder_matched_info.get(count_key, 0) num = to_int(num, default_if_fail=0) return num
def get_recent(self, limit=8): limit = to_int(limit, 8) limit = min(100, limit) post_paths = self._post_paths[:limit*2] # 取 2 倍,如果有非 public 的比较多,可能会不准确 post_docs = get_records_by_paths(self.bucket, post_paths, ignore_marked_id=True, limit=limit) return post_docs
def __getattr__(self, item): # 找不到attribute的时候,会调用getattr if re.match('^recent_?\d+$', item): # 获得最近的几篇文章 limit = re.search('\d+', item).group() limit = to_int(limit) return self.get_recent(limit)
def int(self): # '123'.int or '123'.int('98') return to_int(self.core)
try: ssdb_port = int(ssdb_port) except: ssdb_port = 8888 db_client = SSDB_Client(ssdb_ip, ssdb_port) STATIC_FILE_VERSION = version # 静态资源通过 h.load 载入的时候,增加 version 的逻辑 # Markdown 解析的时候会有扩容的情况,估计 100k 的(多行)文档会膨胀到 500k MAX_RECORD_SIZE = 500 * 1024 # 500Kb # 这个只是对 verify 时候起作用的,如果是 system 直接写入的,不在受限范围 MAX_RECORD_SIZE_FOR_CONFIG = 800 * 1024 ## 800Kb MAX_FILE_SIZE = to_int( get_env("MAX_FILE_SIZE")) or 50 * 1024 * 1024 # 最大文件 50 Mb # 如果是负数,则没有邀请的权限,0 则是处于 open 的状态; ADMIN_BUCKET 始终有邀请的权限。 # 如果是整数,则表示一个 bucket 创建多少天后,才能邀请别人的 bucket # 默认值是 -1,也就是必须邀请,才能创建 bucket try: ALLOWED_INVITE_DAYS = float((get_env('ALLOWED_INVITE_DAYS') or '').strip()) except: ALLOWED_INVITE_DAYS = -1 ADMIN_BUCKET = get_env('ADMIN_BUCKET') or '' DEBUG = bool(get_env('DEBUG')) WEBSOCKET = bool(get_env('WEBSOCKET'))
def after(self, seconds): seconds = to_int(seconds, 0) return Date(self.core + datetime.timedelta(seconds=seconds))
def get_files_info(bucket): data = {} path_bucket = get_bucket_name_for_path(bucket) data['files'] = {} data['folders'] = {} data['lower_files'] = {} data['lower_folders'] = [] # not a dict lower_folders = [] lower_folders_count = {} records = hscan(path_bucket, key_start='', limit=20000) for filepath, filepath_data_string in records: if filepath.startswith('_'): continue lower_filepath = filepath.strip().lower() # prepare raw data starts raw_filepath_data = filepath_data_string.split(',') if len(raw_filepath_data) != 3: continue filepath_data_keys = ['record_id', 'size', 'version'] filepath_data = dict(zip(filepath_data_keys, raw_filepath_data)) filepath_data['size'] = to_int(filepath_data['size'], default_if_fail=0) if filepath_data.get('version') == 'folder': #is_dir = True is_image = False is_markdown = False data['folders'][filepath] = filepath_data if lower_filepath not in lower_folders: lower_folders.append(lower_filepath) else: #is_dir = False # prepare raw data ends is_image = is_a_image_file(filepath) is_markdown = is_a_markdown_file(filepath) data['files'][filepath] = filepath_data data['lower_files'][filepath.strip().lower()] = filepath_data lower_folder_path = os.path.split(filepath.strip().lower())[0] if lower_folder_path: parts = lower_folder_path.split('/') parts_length = len(parts) if parts_length > 10: continue for i in range(parts_length): one_lower_folder_path = '/'.join(parts[:i + 1]) last_path_part = one_lower_folder_path.split('/')[-1] if last_path_part.startswith('_'): continue if one_lower_folder_path not in lower_folders: lower_folders.append(one_lower_folder_path) if one_lower_folder_path: images_count_plus = 1 if is_image else 0 posts_count_plus = 1 if is_markdown else 0 _images_count_plus = 1 if images_count_plus and lower_folder_path == one_lower_folder_path else 0 _posts_count_plus = 1 if posts_count_plus and lower_folder_path == one_lower_folder_path else 0 matched_count = lower_folders_count.setdefault(one_lower_folder_path, {}) matched_count['images_count'] = matched_count.get('images_count', 0) + images_count_plus matched_count['posts_count'] = matched_count.get('posts_count', 0) + posts_count_plus matched_count['_images_count'] = matched_count.get('_images_count', 0) + _images_count_plus matched_count['_posts_count'] = matched_count.get('_posts_count', 0) + _posts_count_plus data['lower_folders'] = lower_folders data['lower_folders_count'] = lower_folders_count data['date'] = time.time() return data
def append_to_markdown_doc_and_sync(bucket, path, content, lines_to_append=1, reverse=False, do_not_repeat=True, lines_more_diff=None, draft_by_default=False): # 默认检测 append 的内容是否重复 if not bucket or not path or not content: return if not isinstance(bucket, string_types) or not isinstance( path, string_types) or not isinstance(content, string_types): return if not has_bucket(bucket): return if not is_a_markdown_file(path): return content = smart_unicode(content) old_doc = get_record_by_path(bucket, path=path) or {} if not isinstance(old_doc, dict): old_doc = {} if lines_more_diff: # 多长时间hi后,自动多一空行 if old_doc and old_doc.get('timestamp'): try: diff = time.time() - old_doc.get('timestamp') if diff > lines_more_diff: lines_to_append += 1 except: pass interval = '\r\n' * abs(to_int(lines_to_append, max_value=10)) # 间隔换行 if old_doc: if get_type_from_record(old_doc) == 'post': # 目前仅支持日志类型文件的append old_content = old_doc.get('raw_content') if old_content == " ": old_content = "" if do_not_repeat: if reverse: if old_content.strip().startswith(content.strip()): return "" else: old_content_s = old_content.strip() appended_content_s = content.strip() if old_content_s.endswith( '\n' + appended_content_s ) or old_content_s == appended_content_s: return '' # ignore, 重复的内容不处理 if reverse: # 插入头部位置 new_content = '%s%s' % (content, interval) content = '%s%s' % (new_content, old_content) else: new_content = '%s%s' % (interval, content) content = '%s%s' % (old_content, new_content) else: return else: # new doc content = content.strip() if draft_by_default: # 新建文档默认是 draft 的状态 if re.match(u"\w+[:\uff1a]", content): # 可能用户自己声明了 metadata content = "status: draft\n%s" % content else: now = get_now_from_bucket(bucket) content = "date: %s\nstatus: draft\n\n%s" % (now, content) sync_file_by_server_side(bucket=bucket, relative_path=path, content=content) return True # done