Пример #1
0
def after_path_related_record_deleted(bucket, record_data):
    path = get_path_from_record(record_data)
    if not path:
        return

    bucket_name_for_id = get_bucket_name_for_path(bucket)
    bucket_name_for_url = get_bucket_name_for_url(bucket)
    bucket_name_for_slash = get_bucket_name_for_slash(bucket)
    bucket_name_for_order = get_bucket_name_for_order_by_record(
        bucket, record_data)

    #bucket_name_for_order_file_type = "%s_file_order" % bucket

    # path 上清掉
    hdel(bucket_name_for_id, path)

    # 数据类型,对应的排序
    if bucket_name_for_order:
        zdel(bucket_name_for_order, path)

    #if bucket_name_for_order != bucket_name_for_order_file_type:
    #    zdel(bucket_name_for_order_file_type, path)

    # slash 上清掉
    zdel(bucket_name_for_slash, path)

    # url 上两个进行删除
    url_path = get_url_path(record_data) or hget(bucket_name_for_url, path)
    if url_path:
        hdel(bucket_name_for_url, url_path)
    hdel(bucket_name_for_url, path)

    # 删除文件, 使用 storage 来处理这里的逻辑
    storage.when_record_deleted(bucket, record_data)
Пример #2
0
def get_post_with_greed(url_body, parent_doc=None):
    pure_url_body = re.split("[?#]", url_body)[0]
    post_url = pure_url_body
    d = get_data_namespace()
    post_doc = d.get_doc(post_url)
    current_data_root = get_current_data_root()
    parent_doc = parent_doc or get_doc_in_request()
    if not post_doc and is_a_markdown_file(post_url) and parent_doc and isinstance(parent_doc, dict):
        filename = post_url
        if "/post/" in filename:
            filename = filename.split("/post/", 1)[-1]
        parent_post_doc_path = get_path_from_record(parent_doc)
        if parent_post_doc_path:
            post_doc_parent = os.path.split(parent_post_doc_path)[0]
            if post_doc_parent:
                abs_path = "%s/%s" % (post_doc_parent.strip("/"), filename.strip("/"))
                post_doc = d.get_doc_by_path(abs_path)

        if current_data_root and not post_doc:  # 增加 wiki_root 作为前缀,再尝试匹配
            abs_path = "%s/%s" % (current_data_root, filename.strip("/"))
            post_doc = d.get_doc_by_path(abs_path)

    if not post_doc:  # 尝试 hit keyword 的方式进行搜索匹配
        bucket = get_bucket_in_request_context()
        post_name = (get_get_var(url_body, "name") or "").strip()
        if post_name:
            if "." in post_name:
                post_name = os.path.splitext(post_name)[0]
            post_doc = get_one_post_by_es(bucket, keywords=post_name, under=current_data_root)
        if not post_doc and is_a_markdown_file(post_url): # 直接搜索 filename
            just_post_file_name = get_just_name(post_url)
            if just_post_file_name != post_name:
                post_doc = get_one_post_by_es(bucket, keywords=just_post_file_name, under=current_data_root)
    return post_doc
Пример #3
0
def get_doc_url_for_template_api(doc,
                                 url_prefix,
                                 url_root=None,
                                 hit_url_path=False):
    # hit_url_path=True 的时候,post 上有 url_path, 但跟 post.url 直接调用的逻辑不亦一样
    # post.url 相当于有一个动态的 url_prefix
    if not doc or not isinstance(doc, dict):
        return ""
    if not isinstance(url_prefix, string_types):
        return ""
    if url_root and not isinstance(url_root, string_types):
        return ""
    url_prefix = url_prefix.strip("/")
    doc_path = get_path_from_record(doc)
    if not doc_path:
        return ""
    url_path = ""
    if hit_url_path:
        url_path = smart_unicode(doc.get("url_path") or "").strip("/")
    if url_path:
        return "/%s/%s" % (url_prefix, url_path)
    if not url_root or not isinstance(url_root, string_types):
        return "/%s/%s" % (url_prefix, doc_path)
    else:
        relative_path = get_relative_path(doc_path.lower(),
                                          url_root.lower(),
                                          return_name_if_fail=False)
        if not relative_path:
            return "/%s/%s" % (url_prefix, doc_path)
        else:
            return "/%s/%s" % (url_prefix, relative_path)
Пример #4
0
def get_wiki_url_for_doc(wiki_root, doc):
    if not isinstance(wiki_root, string_types) or not isinstance(doc, dict):
        return ""
    wiki_root = wiki_root.strip("/")
    doc_type = get_type_from_record(doc)
    doc_path = get_path_from_record(doc)
    relative_path = get_relative_path(doc_path.lower().strip("/"), wiki_root, return_name_if_fail=False)
    if not relative_path:
        return ""
    if doc_type == "post":
        return "/wiki/post/%s" % relative_path
    else:
        return "/wiki/category/%s" % relative_path
Пример #5
0
def update_path_related_when_record_changed(bucket, record_id, record_data):
    # 注意:!!!! 凡是 record 中有指定 path 段的,都是可以被更新的;如果不希望这个 record 被 删除,就不要赋予这个字段
    if not isinstance(record_data, dict):
        return
    if not bucket or not record_id:
        return
    is_deleted = record_data.get('is_deleted', False)

    # path 相关的逻辑
    path = get_path_from_record(record_data)
    if not path:
        return
    if is_deleted:
        # delete the record, 实际上都是增量,只是一个 action=delete 的标识而已; 但也有直接把 record 删除掉的
        after_path_related_record_deleted(bucket, record_data=record_data)
    else:  # create or update
        after_path_related_record_created(bucket, record_id, record_data)
Пример #6
0
def update_record_order_value_to_related_db(bucket, record_data, force_value=None):
    # 设定排序, 如果没有排序逻辑的,实际上根据 get_data(type) 的逻辑是无法取出内容的
    path = get_path_from_record(record_data)
    if not path:
        return
    path = path.strip('/')
    if not path:
        return
    bucket_name_for_order = get_bucket_name_for_order_by_record(bucket, record_data)
    data_type = get_data_type(record_data)
    data_order = record_data.get("_order") or record_data.get("order")
    if not data_order and data_type in ["file", "post", "folder"]:  # 不应该出现的情况
        data_order = time.time()
    if data_order is not None:
        data_order = to_float(data_order, default_if_fail=None)
    if force_value is not None:
        data_order = force_value
    if data_order is not None and bucket_name_for_order:
        zset(bucket_name_for_order, path, data_order)
Пример #7
0
def render_as_static_file_for_farbox_bucket(path):
    if not path or path == "/":
        path = "index.html"
    bucket = get_bucket_in_request_context()
    if not bucket:
        return
    record = get_record_by_path(bucket, path)
    if path == "favicon.ico" and not record:
        record = get_record_by_path(bucket,
                                    "_direct/favicon.ico")  # 兼容 Bitcron
    if not record:
        return
    record_path = get_path_from_record(record)
    if record_path and record_path.startswith("/_data/"):
        ext = os.path.splitext(record_path)[-1].strip(".").lower()
        if ext in ["csv"] and not is_bucket_login(bucket):
            return abort(
                404, "csv under /_data is not allowed to download directly")
    set_context_value_from_request("is_static_file", True)
    if record.get('compiled_type') and record.get('compiled_content'):
        raw_content = record.get('compiled_content')
        content_type = record.get('compiled_type')
        raw_content = to_bytes(raw_content)
        mimetype = content_type or guess_type(
            path) or 'application/octet-stream'
        compiled_file_response = send_file(io.BytesIO(to_bytes(raw_content)),
                                           mimetype=mimetype)
        return compiled_file_response
    else:
        # 先对应是否防盗链的逻辑
        site_configs = get_bucket_site_configs(bucket)
        anti_theft_chain = site_configs.get("anti_theft_chain", True)
        if anti_theft_chain and request.path.strip('/') in ['favicon.ico']:
            anti_theft_chain = False
        if anti_theft_chain and request.referrer:
            refer_host = get_host_from_url(request.referrer)
            if refer_host != request.host and "." in request.path:
                return abort(404, "this url is not allowed for outside")

        return storage.get_download_response_for_record(bucket=bucket,
                                                        record_data=record,
                                                        try_resized_image=True)
Пример #8
0
def compute_content_with_referred_docs(post_doc,
                                       html_content=None,
                                       show_date=True,
                                       url_prefix=None,
                                       url_root=None,
                                       hit_url_path=False):
    path = get_path_from_record(post_doc)
    if not path:
        return ""
    content = html_content or post_doc.get("content") or ""
    new_content = re.sub(
        r"""(<[^<>]+>)?<a [^<>]*?href=['"](.*?)['"][^<>]*?>.*?</a>(</\w+>|<\w+ */>)?""",
        curry(
            re_sub_for_referred_docs,
            show_date=show_date,
            post_doc=post_doc,
            url_prefix=url_prefix,
            url_root=url_root,
            hit_url_path=hit_url_path,
        ), content)
    return new_content
Пример #9
0
def auto_clean_record_before_handle_path_related_record(bucket, record_data):
    # 有 path 的情况,并且 client 过来的数据要求 _auto_clean_bucket 的,则进行如此处理
    # 删除的性质,便不会增加一个冗余的 record,而直接删除掉旧的 record 以及相关数据了; 同时 break,不进行后续的操作了
    # 如果是 update 的性质,则会删除前一个同 path 的 record
    if not isinstance(record_data, dict):
        return  # ignore
    is_deleted = record_data.get('is_deleted')
    path = get_path_from_record(record_data)
    auto_clean_bucket = record_data.get('_auto_clean_bucket', False)
    if not auto_clean_bucket:
        return
    if not path:
        return
    if is_deleted:
        delete_path_related_record_by_path(bucket, path, record_data)
        return 'break'  # 后面不运行了
    else:
        # 已经存在的,先删除, 后面有的,后面再自行 update
        delete_path_related_record_by_path(bucket,
                                           path,
                                           continue_to_create_record=True)
Пример #10
0
 def get_referred_back_docs(self, doc=None): # 引用了本文的 docs
     doc = doc or self.get_current_post(auto_raise_404=False)
     if not doc:
         return []
     else:
         return get_records_by_post_path_back_referred(self.bucket, get_path_from_record(doc))
Пример #11
0
def after_path_related_record_created(bucket, record_id, record_data):
    path = get_path_from_record(record_data)
    if not path:
        return
    path = path.strip('/')
    if not path:
        return
    #original_path = to_unicode(record_data.get('path').strip('/'))

    # 如果 parent 不存在,也需要创建一个
    parent_path = os.path.split(path)[0].strip("/")
    if parent_path and not has_record_by_path(bucket, parent_path):
        # 如果 parent 如果不存在,也需要创建一个, 并且递归往上走,所有的 parents 都创建
        real_path = get_path_from_record(record_data, is_lower=False)
        real_parent_path = os.path.split(real_path)[0].strip("/") # 保留了大小写
        create_record_for_a_folder(bucket, real_parent_path)

    slash_number = path.count('/')
    bucket_name_for_path = get_bucket_name_for_path(bucket)
    bucket_name_for_url = get_bucket_name_for_url(bucket)
    bucket_name_for_slash = get_bucket_name_for_slash(bucket)
    #bucket_name_for_order = get_bucket_name_for_order_by_record(bucket, record_data)

    to_mark_object_id = False
    data_type = get_data_type(record_data)
    if data_type == 'post' and record_data.get('status', 'public')!='public':
        # post 类型的,标记 object_id,前面加 #
        to_mark_object_id = True


    # url 匹配用的索引
    url_path = get_url_path(record_data)
    # 两两对应的映射关系, 除了通过 url 找path,还可以 通过 path 找 url,因为删除一个 record 的时候,只有 path,没有 url_path
    if url_path:
        hset(bucket_name_for_url, url_path, path)
        if path != url_path:
            hset(bucket_name_for_url, path, url_path)

    # 建立 path 与 object_id 的关系
    if to_mark_object_id:
        # 在普通 list 的时候,不会出现;单独路径查找,又能找到
        value = '#%s'%record_id
    else:
        value = record_id

    # 同时记录 version & size:  record_id,size,version
    size = record_data.get('size') or 0
    version = record_data.get('version') or ''
    if record_data.get('is_dir'):
        version = 'folder'
    value = '%s,%s,%s' % (value, size, version)

    hset(bucket_name_for_path, path, value)

    # 设定排序, 如果没有排序逻辑的,实际上根据 get_data(type) 的逻辑是无法取出内容的
    # 如果是 post,并且是非 public 的, 强制 order=0,这样在外部提取 paths 进行分页的时候,默认在 -date 排序的时候,就会排在最后
    update_record_order_value_to_related_db(bucket, record_data, force_value=0 if to_mark_object_id else None)

    # slash number 绑定到 path
    # 指定的类型才能处理 slash
    if data_type in BUCKET_RECORD_SLASH_TYPES:
        zset(bucket_name_for_slash, path, score=slash_number)


    update_tags_info_for_posts(bucket=bucket, record_data=record_data) # files and posts info
Пример #12
0
def show_albums_as_sub_site():
    bucket = get_bucket_in_request_context()
    if not bucket:
        return
    request_path = get_request_path().strip("/")
    if not re.match("album(/|$)", request_path):
        return
    if "." in request_path and guess_type(
            request_path, default_type="").startswith("image/"):
        # 可能是直接的图片地址,避免被整个 album 给拦截了
        return
    site_configs = get_bucket_site_configs(bucket)
    albums_root = smart_unicode(site_configs.get("albums_root", ""))
    if not albums_root:
        return
    albums_root = albums_root.strip("/")  #todo 允许直接设定 / ?
    albums_home_sort = site_configs.get("albums_home_sort", "-date")
    album_items_sort = site_configs.get("album_items_sort", "-date")

    page_title = site_configs.get("albums_title") or get_just_name(
        albums_root, for_folder=True)

    if re.match("album/?$", request_path):
        # folders
        doc_type = "folder"
        doc_sort = albums_home_sort
        under = albums_root
    else:
        doc_type = "image"
        doc_sort = album_items_sort
        under = "%s/%s" % (albums_root, request_path.split("/",
                                                           1)[-1].strip("/"))
        folder_doc = get_record_by_path(bucket=bucket, path=under)
        if folder_doc:
            page_title = folder_doc.get("title") or get_just_name(
                folder_doc.get("path"), for_folder=True)
        else:
            page_title = get_just_name(under, for_folder=True)

    if doc_sort not in ["date", "-date"]:
        doc_sort = "-date"

    limit = 15  # todo 可以设定?
    doc_level = 1

    # min_images_count = 1
    docs = Data.get_data(path=under,
                         type=doc_type,
                         limit=limit,
                         level=doc_level,
                         sort=doc_sort,
                         pager_name='album_docs_pager',
                         exclude='default')

    if doc_type == "folder":
        for doc in docs:
            doc_path = get_path_from_record(doc, is_lower=True)
            relative_path = doc_path.replace(albums_root.lower(), "",
                                             1).strip("/")
            doc["album_url"] = "/album/%s" % relative_path

    return render_api_template(
        "builtin_theme_album_waterfall.jade",
        docs=docs,
        page_title=page_title,
    )
Пример #13
0
def update_post_tags_words_info(bucket, record_data):
    path = get_path_from_record(record_data)
    lower_path = path.lower().lstrip('/')
    if not path:
        return
    if not is_a_markdown_file(path):
        return
    if lower_path.startswith('_nav/'):
        return
    posts_info = get_bucket_posts_info(bucket) or {}
    # data init
    bucket_text_words = to_int(posts_info.get('text_words') or 0,
                               default_if_fail=0)

    # prepare tags info
    tags_info = posts_info.setdefault(
        'tags',
        {})  # {'paths':{path:[tag1,tag2]} ,  'tags': {'tag':[path1, path2]} }
    tags_info_tags = tags_info.setdefault('tags', {})
    tags_info_paths = tags_info.setdefault('paths', {})

    # prepare links info
    links_info = posts_info.setdefault(
        "links", {})  # {'paths': {path:[back_path1, back_path2]]} ,
    #   'back_paths': {'back_path':[path1, path2]} }
    links_info_links = links_info.setdefault("links", {})
    links_info_paths = links_info.setdefault("paths", {})

    words_info = posts_info.setdefault('words', {})  # {'path': text_words}

    is_deleted = record_data.get('is_deleted', False)
    post_status = record_data.get('status') or 'public'
    post_tags = record_data.get('tags') or []
    if not isinstance(post_tags, (list, tuple)):
        post_tags = []

    post_doc_links, wiki_tags = get_linked_docs_from_markdown_content(
        path,
        record_data.get("raw_content"),
        md_link_abs_check_func=partial(has_record_by_path, bucket))
    if not isinstance(post_doc_links, (list, tuple)):
        post_doc_links = []

    text_words = to_int(record_data.get('text_words'), default_if_fail=0)

    # 如果已有 words 的信息,先减去,避免重复计算
    old_text_words = to_int(words_info.get(lower_path), default_if_fail=0)
    if old_text_words:
        bucket_text_words -= old_text_words

    # deleted, 草稿类似的状态,不进入计算;去除相关的信息
    if is_deleted or post_status in ['draft', 'private']:
        words_info.pop(lower_path, None)

        # handle delete tags
        old_tags = tags_info_paths.get(lower_path)
        if not isinstance(old_tags, (list, tuple)):
            old_tags = []
        old_tags = [smart_unicode(tag) for tag in old_tags]
        tags_info_paths.pop(lower_path, None)
        for tag in old_tags:
            tags_info_tags_for_tag_paths = tags_info_tags.setdefault(tag, [])
            if lower_path in tags_info_tags_for_tag_paths:
                tags_info_tags_for_tag_paths.remove(lower_path)
                if not tags_info_tags_for_tag_paths:  # 空的 tags
                    tags_info_tags.pop(tag, None)

        # handle delete links
        old_links = links_info_paths.get(lower_path)
        if not isinstance(old_links, (list, tuple)):
            old_links = []
        old_links = [smart_unicode(link) for link in old_links]
        links_info_paths.pop(lower_path, None)
        for link in old_links:
            links_info_link_back_paths = links_info_links.setdefault(link, [])
            if lower_path in links_info_link_back_paths:
                links_info_link_back_paths.remove(lower_path)
                if not links_info_link_back_paths:  # 空的 links 了
                    links_info_links.pop(link, None)

    else:
        bucket_text_words += text_words
        words_info[lower_path] = text_words

        # handle tags
        if post_tags:
            tags_info_paths[lower_path] = post_tags
        for tag in post_tags:
            tags_info_tags_for_tag_paths = tags_info_tags.setdefault(tag, [])
            if lower_path not in tags_info_tags_for_tag_paths:
                tags_info_tags_for_tag_paths.append(lower_path)
        empty_tags = []
        for tag, paths_tagged in tags_info_tags.items():
            if not paths_tagged:
                empty_tags.append(tag)
                continue
            if not isinstance(paths_tagged, list):
                continue
            if lower_path in paths_tagged and tag not in post_tags:
                paths_tagged.remove(lower_path)
            if not paths_tagged:
                empty_tags.append(tag)
        for empty_tag in empty_tags:
            tags_info_tags.pop(empty_tag, None)

        # handle links
        if post_doc_links:
            links_info_paths[lower_path] = post_doc_links
        for link in post_doc_links:
            links_info_link_back_paths = links_info_links.setdefault(link, [])
            if lower_path not in links_info_link_back_paths:
                links_info_link_back_paths.append(lower_path)
        empty_links = []
        for link, paths_linked in links_info_links.items():
            if not paths_linked:
                empty_links.append(link)
                continue
            if not isinstance(paths_linked, list):
                continue
            if lower_path in paths_linked and link not in post_doc_links:
                paths_linked.remove(lower_path)
            if not paths_linked:
                empty_links.append(link)
        for empty_link in empty_links:
            links_info_links.pop(empty_link, None)

    if bucket_text_words < 0:
        bucket_text_words = 0

    posts_info['text_words'] = bucket_text_words

    set_bucket_configs(bucket, configs=posts_info, config_type='posts')