def create_record_for_a_folder(bucket="", folder_path=""): if not is_valid_bucket_name(bucket): return folder_path = folder_path.strip("/") if not folder_path: return if has_record_by_path(bucket, folder_path): # 再校验一次 folder 是否存在 return folder_record_data = dict( path = folder_path, relative_path = folder_path, is_dir = True, slash_number = folder_path.count("/"), type = "folder", _type = "folder", _order = int(time.time()), title = os.path.split(folder_path)[-1], ) object_id = str(ObjectId()) folder_record_data["_id"] = object_id hset(bucket, object_id, folder_record_data, ignore_if_exists=False) after_path_related_record_created(bucket, object_id, folder_record_data)
def load_all_posts_visits_from_csv(bucket, csv_file_record): visits_db_name = get_visits_db_name_for_bucket(bucket) current_visits_size = hsize(visits_db_name) if current_visits_size > 5000: # 如果超过了 5k 的数量,先clear,避免过度冗余 hclear(visits_db_name) raw_objects = csv_file_record.get('objects') or [] if not raw_objects: return if not isinstance(raw_objects, (list, tuple)): return for data_obj in raw_objects[:3000]: # 最多处理 3k 条记录,避免一个 bucket 过于庞大,出现性能问题 if not isinstance(data_obj, dict): continue path = data_obj.get('path') if not path or not isinstance(path, string_types): continue path = path.strip('/').lower() if not is_a_markdown_file(path): continue visits = to_int(data_obj.get('visits'), default_if_fail=0) visitors = to_int(data_obj.get('visitors'), default_if_fail=0) visits_key = get_visits_key(path, field='visits') visitors_key = get_visits_key(path, field='visitors') hset(visits_db_name, visits_key, visits) hset(visits_db_name, visitors_key, visitors)
def pull_domain_from_bucket(bucket, domain): if not isinstance(domain, string_types): return domain = domain.lower().strip() domains = get_bucket_domains(bucket) if domain in domains: domains.remove(domain) hset("_domains", bucket, domains)
def create_invitation(note=""): code = str(ObjectId()) info_doc = dict( _id=code, created_at=time.time(), note=note, ) hset('_bucket_invite', key=code, value=info_doc)
def set_simple_bucket_token(bucket, private_key_md5): if not bucket: return if isinstance(private_key_md5, string_types) and len(private_key_md5) < 100: old_value = get_simple_bucket_token(bucket) if old_value == private_key_md5: return hset('_simple_bucket_token', bucket, private_key_md5)
def set_ssl_cert_for_domain(domain, ssl_key, ssl_cert, by_user=False, bucket=None): domain = domain.strip().lower() doc = dict( ssl_key = ssl_key, ssl_cert = ssl_cert, created_at = datetime.datetime.utcnow(), by_user = by_user, bucket = bucket, ) hset('_domain_ssl', domain, doc)
def use_invitation(code, bucket): code = code.strip() invitation = get_invitation(code) if not invitation: return False else: invitation["bucket"] = bucket invitation["used_at"] = time.time() hset('_bucket_invite', key=code, value=invitation) return True
def _func(*args, **kwargs): t1 = time.time() result = func(*args, **kwargs) try: seconds_cost = str(time.time() - t1) key = '%s_%s' % (int(t1*1000), self.field) hset('_system_recorder', key=key, value=seconds_cost) except: pass return result
def push_domain_to_bucket(bucket, domain, old_domain=None): if not isinstance(domain, string_types): return if old_domain: # 需要先移除旧的域名 pull_domain_from_bucket(bucket, old_domain) domain = domain.lower().strip() domains = get_bucket_domains(bucket) if domain not in domains: domains.append(domain) hset("_domains", bucket, domains)
def fix_records_count(): # 从数据库中,重新构建 _records_count 上的数据 #hclear('_records_count') hlist_count = get_hlist_and_count() total_count = 0 for name, count in hlist_count.items(): if len(name) < 32: continue total_count += count hset('_records_count', name, count) hset('_records_count', 'all', total_count)
def set_bucket_private_configs(bucket, configs): if not isinstance(configs, dict): return if not has_bucket(bucket): return configs_data = json_dumps(configs) if len(configs_data) > 500 * 1024: # 不能大于 500k return else: hset("_bucket_private_configs", bucket, configs_data) return True
def set_bucket_api_token(bucket, db_name="_bucket_api_token"): """ :param bucket: 需要 :param db_name: 提供了默认值 :return: new_api_token """ if not is_valid_bucket_name(bucket): return "" if not has_bucket(bucket): return "" new_token = get_a_random_api_token() hset(db_name, bucket, new_token) return new_token
def basic_mark_bucket_to_sync(namespace, bucket, **kwargs): if not bucket: return if not is_valid_bucket_name(bucket): return if not has_bucket(bucket): return if not namespace.startswith('_'): namespace = '_' + namespace data = dict( bucket=bucket, date=time.time(), ) if kwargs: data.update(kwargs) hset(namespace, bucket, data, ignore_if_exists=True)
def set_name_by_wechat_user_id(wechat_user_id, name): if not isinstance(wechat_user_id, string_types) or not isinstance( name, string_types): return name = smart_unicode(name) if not name: return return hset("wechat_names", wechat_user_id, name)
def bind_bucket_by_wechat(wechat_user_id, bind_code): bucket = check_wechat_bind_code(bind_code) if not bucket: return u'绑定信息有误或者已经超过1小时有效期' # 绑定到当前的 bucket 下, 一个 bucket 下可以有多个 wechat account doc = dict( name="", uid=wechat_user_id, date=datetime.datetime.utcnow(), ) hset(get_bucket_namespace_for_wechat(bucket), wechat_user_id, doc) # list # 一个 wechat_user 只能对应一个 bucket hset("wechat_accounts", wechat_user_id, bucket) return get_bucket_bind_status_reply(bucket)
def set_bucket_service_info(bucket, order_id=None, **kwargs): if not is_valid_bucket_name(bucket): return info = get_bucket_service_info(bucket) if not info.get("bucket"): info["bucket"] = bucket if order_id: order_id_list = info.get("order_id_list") if not isinstance(order_id_list, (list, tuple)): order_id_list = [] if isinstance(order_id_list, tuple): order_id_list = list(order_id_list) if order_id not in order_id_list: order_id_list.append(order_id) info["order_id_list"] = order_id_list info.update(kwargs) hset("_bucket_info", bucket, info)
def show_avatar(avatar_id): avatar_id = get_avatar_id(avatar_id) avatar_doc = hget('_avatar', avatar_id) now = time.time() if avatar_doc: avatar_date = avatar_doc.get('date') avatar_image_content = avatar_doc.get('content') to_clear = False if not avatar_date: to_clear = True elif (now - avatar_date) > 5 * 24 * 60 * 60: # 5days to_clear = True elif ( now - avatar_date ) > 1 * 24 * 60 * 60 and not avatar_image_content: # 1day for empty avatar image to_clear = True if to_clear: # avatar_doc 缓存 5 天 hdel('_avatar', avatar_id) avatar_doc = None if not avatar_doc: avatar_image_content = get_gavatar_image_content(avatar_id) or '' if avatar_image_content: avatar_image_content = base64.b64encode(avatar_image_content) avatar_doc = dict(date=now, content=avatar_image_content) hset('_avatar', avatar_id, avatar_doc) if not is_doc_modified(avatar_doc, date_field='date'): return get_304_response() else: avatar_image_content = avatar_doc.get('content') or '' if avatar_image_content: avatar_image_content = base64.b64decode(avatar_image_content) response = Response(avatar_image_content, mimetype='image/png') set_304_response_for_doc(avatar_doc, response, date_field='date') return response else: # 默认的 url r_response = send_static_file('defaults/avatar.png') if r_response: return r_response # at last abort(404)
def update_record(bucket, record_id, **kwargs): if not record_id: return False if isinstance(record_id, dict): record_id = record_id.get("_id") record = get_record(bucket, record_id, force_dict=True) if not record: return False for field in fields_not_allowed_to_update: # 不允许更改的字段 kwargs.pop(field, None) if not kwargs: return False record.update(kwargs) # update hset(bucket, record_id, record) order_value_in_kwargs = kwargs.get("_order") if isinstance(order_value_in_kwargs, (float, int)) and record.get("path"): # update path related _order value, specially for image update_record_order_value_to_related_db(bucket, record) return True
def create_bucket_by_public_key(public_key, init_configs=None, force_to_create=False): # 本质上是创建一个 创世configs if not force_to_create: # todo 需要校验邀请码? # 非强制需要创建的,需要被邀请才能创建 bucket return False bucket = get_bucket_by_public_key(public_key) if not bucket: return False if has_bucket(bucket): # 已经存在了,不允许重新创建, 认为已经创建成功了 return True now = int(time.time()) now_date = datetime.datetime.utcfromtimestamp(now) now_date_string = now_date.strftime('%Y-%m-%d %H:%M:%S UTC') bucket_info = dict( public_key=public_key, created_at=now, created_date=now_date_string, ) if init_configs and isinstance(init_configs, dict): allowed_to_update = True init_configs_bytes = json_dumps(init_configs) if len(init_configs_bytes) > 10 * 1024: allowed_to_update = False if allowed_to_update: init_configs.update(bucket_info) bucket_info = init_configs # 创建 init config, 这是不可修改的 hset(bucket, zero_id, bucket_info, ignore_if_exists=True) # 创建的时候,也进行这个操作,方便知道 buckets 的总数相关数据 set_bucket_into_buckets(bucket) # 创建的时候,给 30 days 的有效期 change_bucket_expired_date(bucket) return True
def register_bucket_domain_from_system(bucket, domain, is_admin=False): # 注册系统提供的二级域名 # 一个 bucket 只允许注册一个系统提供的二级域名 # 返回 None or 错误信息 domain = domain.strip().lower() if not is_valid_bucket_name(bucket): return 'invalid bucket' domain_info = get_domain_basic_info(domain, is_admin=is_admin) is_system_domain = domain_info.get('is_system_domain', False) is_allowed = domain_info.get('allowed', False) if is_system_domain and is_allowed: r_domain_info = hget('_rdomain', bucket) # 当前 bucket 是否已经域名绑定了 parked_domain_info = hget('_domain', domain) # 这个域名是否已经被其它 bucket 绑定了 if parked_domain_info: # domain 已经注册过了 if parked_domain_info.get('bucket') == bucket: return None return '%s is used by other bucket' % domain if r_domain_info: # bucket 已经绑定过 domain 了 master_old_domain = r_domain_info.get('domain') if master_old_domain == domain: return None else: # 一个 bucket 只能绑定一个系统的二级域名,就会删除之前的一个 hdel('_rdomain', bucket) hdel('_domain', master_old_domain) pull_domain_from_bucket(bucket, master_old_domain) # 汇总 domains # 一个 _rdomain, 作为一个反向的对应, 一个 bucket 只能有一个系统级的域名 ? domain_doc = dict(bucket=bucket, domain=domain, created_at=time.time()) hset('_domain', domain, domain_doc) hset('_rdomain', bucket, domain_doc) push_domain_to_bucket(bucket, domain) # 汇总 domains return None else: return '%s is not allowed for bucket:%s' % (domain, bucket)
def set_bucket_configs(bucket, configs, config_type='site', by_system=False): # config_type in site, user, files, pages, if not configs or not isinstance(configs, dict): return False # ignore if not has_bucket(bucket): return False if not by_system and config_type in config_names_not_allowed_set_by_user: # 不是系统处理的,而且 config 类型不是用户可以修改的, ignore return False if config_type == 'user': # hash the password re_configs_for_user(configs) if config_type in ['site'] and isinstance( configs, dict) and not configs.get('date'): configs['date'] = int(time.time()) # 设定 mtime if config_type in ['pages', 'site']: configs['mtime'] = time.time() configs['_config_type'] = config_type config_doc_id = bucket_config_doc_id_names.get(config_type) if not config_doc_id: return False if config_doc_id: hset(bucket, config_doc_id, configs) # todo 变动 id 上的数据变化,这个怎么进行同步呢?按照固定区域查询? set_bucket_into_buckets(bucket) return True else: return False
def register_bucket_independent_domain(bucket, domain): # 注册独立域名, 这个前提是域名已经 park 到当前节点,并且已经做了必要的校验 # 返回 None or 错误信息 domain = domain.strip().lower() if not is_valid_domain(domain): return 'domain format error or not supported' if not is_valid_bucket_name(bucket): return 'invalid bucket' if not has_bucket(bucket): return 'current node does not have bucket:%s' % bucket bucket = bucket.strip() old_domain_info = hget('_domain', domain) or {} old_matched_bucket = old_domain_info.get('bucket') if old_matched_bucket == bucket: # # 已经注册过了,不做处理 return None #return 'registered already for this bucket, no need to change' if domain == "thisisadomainfortest.com": # for test only bucket_in_domain_text = bucket else: bucket_in_domain_text = get_domain_text_record(domain) if bucket_in_domain_text == bucket: # 比如 A 曾经注册过,后来 domain 是 B 的了,那么 B 修改了 TXT 记录,就可以重新注册了。 # at last, create or modify hset('_domain', domain, dict( bucket=bucket, created_at=time.time(), )) push_domain_to_bucket(bucket, domain) # 汇总 domains return None # done else: if bucket_in_domain_text: if not is_valid_bucket_name(bucket_in_domain_text): return 'invalid bucket format in TXT record: %s' % bucket_in_domain_text else: return 'TXT record is not matched to %s' % bucket else: return 'should set TXT record for domain first'
def create_record_by_sync(bucket, record, check_bucket=False): if not isinstance(record, dict): return 'record is not a dict' record_id = record.pop('_id', None) if not record_id: return 'record_id is missing' if not is_object_id(record_id): return 'record_id is not a valid ObjectID' error_info = get_record_data_error_info(record) if error_info: return error_info if check_bucket: # current node has the bucket or not if not has_bucket(bucket): return 'no bucket matched' py_record_data = ssdb_data_to_py_data(record) saved = hset(bucket, record_id, py_record_data, ignore_if_exists=True) if saved: after_record_created(bucket, py_record_data, object_id=record_id, )
def after_path_related_record_created(bucket, record_id, record_data): path = get_path_from_record(record_data) if not path: return path = path.strip('/') if not path: return #original_path = to_unicode(record_data.get('path').strip('/')) # 如果 parent 不存在,也需要创建一个 parent_path = os.path.split(path)[0].strip("/") if parent_path and not has_record_by_path(bucket, parent_path): # 如果 parent 如果不存在,也需要创建一个, 并且递归往上走,所有的 parents 都创建 real_path = get_path_from_record(record_data, is_lower=False) real_parent_path = os.path.split(real_path)[0].strip("/") # 保留了大小写 create_record_for_a_folder(bucket, real_parent_path) slash_number = path.count('/') bucket_name_for_path = get_bucket_name_for_path(bucket) bucket_name_for_url = get_bucket_name_for_url(bucket) bucket_name_for_slash = get_bucket_name_for_slash(bucket) #bucket_name_for_order = get_bucket_name_for_order_by_record(bucket, record_data) to_mark_object_id = False data_type = get_data_type(record_data) if data_type == 'post' and record_data.get('status', 'public')!='public': # post 类型的,标记 object_id,前面加 # to_mark_object_id = True # url 匹配用的索引 url_path = get_url_path(record_data) # 两两对应的映射关系, 除了通过 url 找path,还可以 通过 path 找 url,因为删除一个 record 的时候,只有 path,没有 url_path if url_path: hset(bucket_name_for_url, url_path, path) if path != url_path: hset(bucket_name_for_url, path, url_path) # 建立 path 与 object_id 的关系 if to_mark_object_id: # 在普通 list 的时候,不会出现;单独路径查找,又能找到 value = '#%s'%record_id else: value = record_id # 同时记录 version & size: record_id,size,version size = record_data.get('size') or 0 version = record_data.get('version') or '' if record_data.get('is_dir'): version = 'folder' value = '%s,%s,%s' % (value, size, version) hset(bucket_name_for_path, path, value) # 设定排序, 如果没有排序逻辑的,实际上根据 get_data(type) 的逻辑是无法取出内容的 # 如果是 post,并且是非 public 的, 强制 order=0,这样在外部提取 paths 进行分页的时候,默认在 -date 排序的时候,就会排在最后 update_record_order_value_to_related_db(bucket, record_data, force_value=0 if to_mark_object_id else None) # slash number 绑定到 path # 指定的类型才能处理 slash if data_type in BUCKET_RECORD_SLASH_TYPES: zset(bucket_name_for_slash, path, score=slash_number) update_tags_info_for_posts(bucket=bucket, record_data=record_data) # files and posts info
def create_record(bucket, record_data, avoid_repeated=True, auto_id=True, file_content=None, return_record=False): # make sure the bucket is correct before create record # 如果返回数据,就是 error_info # avoid_repeated 就是避免跟最后一条数据 body 是一样的 error_info = get_record_data_error_info(record_data) if error_info: return error_info py_record_data = ssdb_data_to_py_data(record_data) byte_record_data = py_data_to_ssdb_data(record_data) if auto_id: object_id = str(ObjectId()) if '_id' not in py_record_data and isinstance( py_record_data, dict): # record data 如有必要自动填充 _id py_record_data['_id'] = object_id else: object_id = py_record_data.get('_id') or py_record_data.get('id') avoid_repeated = False # 指定的 id 的,不做 repeated 的校验 if not object_id: return 'auto_id disabled, should pass id in the record data' if avoid_repeated: # 避免最后一条记录的重复 record_md5 = to_md5(byte_record_data) if not allowed_to_create_record_in_bucket(bucket, record_md5): error_info = 'current data is repeated to latest record @%s' % bucket if isinstance(py_record_data, dict): path_in_record = py_record_data.get('path') if path_in_record: error_info += smart_unicode(', the path is %s' % path_in_record) return error_info else: update_bucket_last_record_md5(bucket, record_md5) # '_auto_clean_bucket' in record_data and is `True` # 如果是 delete 的直接删除 (break),反之则是完全的 update,相当于新的 record 代替 旧的 record auto_clean_status = auto_clean_record_before_handle_path_related_record( bucket, py_record_data) if auto_clean_status == 'break': return # store pre_object_id # 获得上一个对象的 id, 将当前的 data 转为 dict (如果是),存储 _pre_id 这个字段 pre_object_id = get_bucket_max_id(bucket) if pre_object_id: if isinstance(py_record_data, dict): py_record_data['_pre_id'] = pre_object_id # 存储 record, 并且更新 bucket 上 max_id 的信息 # 由于 record_id 是随机生成,本质上不会重复,故 ignore_if_exists=False, 避免一次校验的过程 hset(bucket, object_id, py_record_data, ignore_if_exists=False) after_record_created(bucket, py_record_data, object_id=object_id, should_update_bucket_max_id=True) # 更新 buckets 的信息,表示当前 bucket 刚刚被更新过了 set_bucket_into_buckets(bucket) if py_record_data.get("path"): # path 相关的,因为有 delete 的关系,单独进行 last_record_id 的存储,不然直接 hget_max 就可以了 set_bucket_last_record_id(bucket, object_id) if file_content and not py_record_data.get("raw_content"): # 指定了要存储的 file content,并且 record 中并没有 raw_content 这个字段,进行文件的存储 storage.accept_upload_file_from_client( bucket, py_record_data, get_raw_content_func=file_content) if py_record_data.get("path") == "settings.json" and py_record_data.get( "raw_content"): try: site_settings = json_loads(py_record_data.get("raw_content")) if isinstance(site_settings, dict): set_bucket_configs(bucket, site_settings, config_type="site") except: pass if return_record: return py_record_data
def update_bucket_max_id(bucket, max_id): # 只有在完全新的记录生成时才会处置 # 如果只是 node 之间的同步, 并不会增加其 max_id hset('_bucket_max_id', bucket, max_id)
def update_bucket_delta_id(bucket, delta_id): # 这个是同步时候用的 hset('_bucket_delta_id', bucket, delta_id)
def set_buckets_cursor_for_remote_node(node, cursor): if cursor is None: return hset('_remote_buckets_cursor', node, cursor)
def set_bucket_last_record_id_computed(bucket, record_id): # 因为 files_info 是需要计算的,只有在需要的时候 (也就是 client 端获取 files 信息的时候),才进行一次计算 if not is_object_id(record_id): return hset("buckets_file_cursor_computed", bucket, record_id)
def set_bucket_last_record_id(bucket, record_id): # # path 相关的,因为有 delete 的关系,单独进行 last_record_id 的存储,不然直接 hget_max 就可以了 if not is_object_id(record_id): return hset("buckets_file_cursor", bucket, record_id)