Exemplo n.º 1
0
def delete_sync_log(root, app_name):
    sync_folder = join(root, '.sync')
    log_path = join(sync_folder, '%s.log' % app_name)
    try:
        delete_file(log_path, to_trash=False)
    except:
        pass
Exemplo n.º 2
0
def get_sync_cursor(root, app_name='farbox_bucket'):
    sync_folder = join(root, '.sync')
    cursor_path = join(sync_folder, '%s.cursor' % app_name)
    if os.path.isfile(cursor_path):
        with open(cursor_path) as f:
            return f.read()
    return ''
Exemplo n.º 3
0
def delete_sync_cursor(root, app_name):
    sync_folder = join(root, '.sync')
    cursor_path = join(sync_folder, '%s.cursor' % app_name)
    try:
        delete_file(cursor_path, to_trash=False)
    except:
        pass
Exemplo n.º 4
0
def get_path_with_dot_allowed(root, *keywords):
    possible_paths = []
    for keyword in keywords:
        possible_paths.append(join(root, '.%s' % keyword))
        possible_paths.append(join(root, keyword))
    path = None  # by default
    for path in possible_paths:
        if os.path.exists(path):
            return path
    return path
Exemplo n.º 5
0
def sync_find_files_to_delete(root_path, app_name, as_dict=False):
    sync_data_folder = get_sync_data_folder(root_path, app_name)
    if not os.path.isdir(sync_data_folder):  # never synced before
        return []
    files = sync_loop_local_filesystem(root_path,
                                       app_name=app_name,
                                       check_md5=False)  # same_path already
    data_filenames = os.listdir(sync_data_folder)
    old_file_paths = []
    old_dir_paths = set()
    for data_filename in data_filenames:
        data_filepath = join(sync_data_folder, data_filename)
        try:
            with open(data_filepath) as f:
                data = json.loads(f.read())
                filepath = data.get('filepath')
                is_dir = data.get('is_dir', False)
                if data.get('is_relative'):
                    filepath = join(root_path, filepath)
                if filepath:
                    filepath = same_slash(filepath)
                    old_file_paths.append(filepath)
                    if is_dir:
                        old_dir_paths.add(filepath)

        except:
            pass
    _filepaths_to_delete = list(set(old_file_paths) - set(files))

    # 让 folder 类型的排在最后
    filepaths_to_delete = []
    dirs_to_delete = []
    for path in _filepaths_to_delete:
        # todo 尝试判断是不是在 iCloud 上
        is_dir = filepath in old_dir_paths
        if not is_dir:
            filepaths_to_delete.append(path)
        else:
            dirs_to_delete.append(path)
    filepaths_to_delete += dirs_to_delete

    if as_dict:
        filepaths_to_delete_as_dict = []
        for filepath in filepaths_to_delete:
            is_dir = filepath in old_dir_paths
            filepaths_to_delete_as_dict.append(
                dict(path=filepath, filepath=filepath, is_dir=is_dir))
        return filepaths_to_delete_as_dict

    else:
        return filepaths_to_delete
Exemplo n.º 6
0
def make_sure_archive_folder(filepath):
    # 根据某一个文件路径,创建其对应的.Archive目录,主要是用来处理版本管理的
    folder_path = same_slash(os.path.dirname(filepath))
    archive_path = join(folder_path, '.Archive')
    if not os.path.isdir(archive_path):
        os.makedirs(archive_path)
    return archive_path
Exemplo n.º 7
0
def sync_loop_local_filesystem(root_path,
                               app_name,
                               check_md5=True,
                               extra_should_sync_func=None):
    root_path = same_slash(root_path)
    if not os.path.isdir(root_path):  # 根目录不存在,不处理
        return []
    file_paths = []
    for parent, folders, files in os.walk(root_path):
        if is_a_hidden_path(parent):
            continue
        elif not is_real(parent):  # link类型的不处理
            continue
        for fs in [files, folders]:
            for filename in fs:
                filepath = join(parent, filename)
                # 看是否已经在本地数据库了
                if not should_sync(
                        filepath,
                        root_path,
                        app_name,
                        check_md5,
                        extra_should_sync_func=extra_should_sync_func):
                    continue
                file_paths.append(filepath)
    return file_paths
Exemplo n.º 8
0
    def __init__(self, server_node, root, private_key=None, should_encrypt_file=True, files_info_filepath=None,
                app_name_for_sync='farbox_bucket', should_sync_file_func=None, auto_clean_bucket=True):
        self.server_node = server_node
        self.root = root
        self.private_key = private_key
        self.should_encrypt_file = should_encrypt_file
        self.auto_clean_bucket = auto_clean_bucket

        self.files_info_filepath = files_info_filepath or join(root, '.files_info.json')
        files_info = load_json_file(self.files_info_filepath)
        if not isinstance(files_info, dict):
            files_info = {}
        ipfs_files = files_info.setdefault('files', {})
        if not isinstance(ipfs_files, dict):
            ipfs_files = {}
            files_info['files'] = ipfs_files

        self.ipfs_files = ipfs_files
        self.files_info = files_info

        self.app_name_for_sync = app_name_for_sync or 'farbox_bucket'

        self.files_info_on_server = {}  # the files' info from remote server side

        # pass relative-path to this func, return True/False to sync or not
        self.should_sync_file_func = should_sync_file_func
Exemplo n.º 9
0
def clear_sync_meta_data(root, app_name='farbox_bucket'):
    data_folder = get_sync_data_folder(root, app_name)
    delete_file(data_folder, to_trash=False)
    delete_sync_cursor(root, app_name)  # 删除 meta 的逻辑
    delete_sync_log(root, app_name)  # 删除日志
    files_info_info_file = join(root, '.files_info.json')
    delete_file(files_info_info_file, to_trash=True)
Exemplo n.º 10
0
def default_set_cursor_func(root, cursor):
    cursor_file = join(root, ".farbox.cursor")
    try:
        with open(cursor_file, "wb") as f:
            f.write(cursor)
            return True
    except:
        return False
Exemplo n.º 11
0
def get_file_versions_folder(filepath):
    filepath = same_slash(filepath)
    if not os.path.isfile(filepath):  # 源文件不存在,则不处理
        return  # ignore
    archive_path = make_sure_archive_folder(filepath)  # 确保.Archive目录存在
    filename = os.path.split(filepath)[-1]
    versions_folder = join(archive_path, filename)
    return versions_folder
Exemplo n.º 12
0
def default_get_cursor_func(root):
    if not os.path.isdir(root):
        return
    cursor_file = join(root, ".farbox.cursor")
    if os.path.isfile(cursor_file):
        return read_file(cursor_file)
    else:
        return None
Exemplo n.º 13
0
 def do_record_sync_log(self, log):
     now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
     log = to_bytes('%s %s\n\n' % (now, log))
     sync_log_filepath = join(self.root, '.sync/%s_sync.log' % self.app_name_for_sync)
     try:
         make_sure_path(sync_log_filepath)
         with open(sync_log_filepath, 'a') as f:
             f.write(log)
     except:
         pass
Exemplo n.º 14
0
def store_sync_from_log(root, log):
    now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    log = smart_str('%s %s\n\n' % (now, log))
    sync_log_filepath = join(root, '.sync/farbox_sync_from.log')
    try:
        make_sure_path(sync_log_filepath)
        with open(sync_log_filepath, 'a') as f:
            f.write(log)
    except:
        pass
Exemplo n.º 15
0
    def sync_for_deleted_files(self):
        # 处理删除了的文件
        synced = False
        filepaths_to_delete_data = sync_find_files_to_delete(
            self.root, app_name=self.app_name_for_sync, as_dict=True)
        for filepath_to_delete_data in filepaths_to_delete_data:
            filepath_to_delete = filepath_to_delete_data['filepath']
            is_dir = filepath_to_delete_data.get('is_dir', False)
            relative_path = get_relative_path(filepath_to_delete,
                                              root=self.root)
            ipfs_to_delete = self.ipfs_files.pop(relative_path, None)
            if isinstance(ipfs_to_delete, dict):
                ipfs_hash_to_delete = ipfs_to_delete.get('hash')
            else:
                ipfs_hash_to_delete = ipfs_to_delete
            self.remove_file_from_ipfs(ipfs_hash_to_delete)

            # is_deleted=True, send md5 value as version
            md5_value = filepath_to_delete_data.get('md5')

            compiler_sync_worker = FarBoxSyncCompilerWorker(
                server_node=self.server_node,
                root=self.root,
                filepath=filepath_to_delete,
                is_deleted=True,
                is_dir=is_dir,
                private_key=self.private_key,
                should_encrypt_file=self.should_encrypt_file,
                ipfs_key=ipfs_hash_to_delete,
                version=md5_value,
                auto_clean_bucket=self.auto_clean_bucket,
                files_info=self.files_info)
            sync_status = compiler_sync_worker.sync()
            self.record_sync_log(filepath=filepath_to_delete,
                                 sync_status=sync_status,
                                 is_deleted=True)
            if sync_status and sync_status.get('code') == 200:
                synced = True
                # at last, mark status as synced
                after_sync_deleted(filepath_to_delete,
                                   root=self.root,
                                   app_name=self.app_name_for_sync)

        # files on server, but no in local side, clean the configs_for_files
        # should run after self.sync_for_updated_files, to get self.files_info_on_server
        files_info_on_server = get_value_from_data(self.files_info_on_server,
                                                   'message.files') or {}
        for relative_path in files_info_on_server.keys():
            abs_filepath = join(self.root, relative_path)
            if not os.path.isfile(abs_filepath):
                self.ipfs_files.pop(relative_path, None)
                synced = True

        return synced
Exemplo n.º 16
0
def get_sync_data(filepath, root, app_name):
    # get the synced information for a filepath
    # 根据文件的路径,获得对应 md5 文件,里面存储了必要的信息(md5 * synced_at),用于判断当前文件是否需要同步
    filepath = same_slash(filepath)
    data_path = get_sync_data_filepath(filepath, root, app_name)
    if os.path.isfile(data_path):
        try:
            with open(data_path) as f:
                data = json.loads(f.read())
                if data.get('is_relative'):
                    # 相对路径,转为绝对路径
                    data['filepath'] = join(root, data['filepath'])
            if isinstance(data, dict):
                return data
        except:
            pass
    return {}  # final
Exemplo n.º 17
0
def sync_site_folder_simply(
    node,
    root,
    private_key,
    should_encrypt_file=False,
    app_name_for_sync=None,
    print_log=True,
    exclude_rpath_func=None,
):
    if not node or not root or not private_key:
        return  # ignore
    if not os.path.isdir(root):
        return  # ignore
    if not is_valid_private_key(private_key):
        return  # ignore
    now = time.time()
    app_name_for_sync = app_name_for_sync or 'farbox_bucket'
    site_folder_status_config_filepath = join(
        root, '.%s_site_folder_status.json' % app_name_for_sync)
    site_folder_status = load_json_file(
        site_folder_status_config_filepath) or {}
    bucket = get_bucket_by_private_key(private_key)
    old_bucket = site_folder_status.get('bucket')
    old_node = site_folder_status.get('node')
    if bucket != old_bucket or node != old_node:
        # bucket or node changed, reset the sync
        clear_sync_meta_data(root=root, app_name=app_name_for_sync)
        site_folder_status['bucket'] = bucket
        site_folder_status['node'] = node
        # configs 的逻辑也调整下
        for key in site_folder_status:
            if key.endswith('_md5'):
                site_folder_status.pop('key', None)

    # dump_template first
    template_folder = get_path_with_dot_allowed(root, 'template')
    if os.path.isdir(template_folder):
        pages_data = get_pages_data(template_folder)
        current_pages_md5 = get_md5(json_dumps(pages_data, indent=4))
        old_pages_md5 = site_folder_status.get('pages_md5')
        if current_pages_md5 != old_pages_md5:  # 模板发生变化
            old_pages_data = site_folder_status.get('pages') or {}
            sync_status = dump_pages(
                node=node,
                private_key=private_key,
                pages_dir=template_folder,
                old_pages_data=old_pages_data,
            )
            sync_status_code = sync_status.get('code')
            if sync_status_code != 200:
                if print_log:
                    print(sync_status.get('message'))
                return
            else:
                # update pages_md5
                site_folder_status['pages_md5'] = current_pages_md5
                site_folder_status['pages'] = pages_data
                if print_log:
                    print('template is changed and synced')

    # update files first
    files_changed = sync_folder_simply(node=node,
                                       root=root,
                                       private_key=private_key,
                                       should_encrypt_file=should_encrypt_file,
                                       app_name_for_sync=app_name_for_sync,
                                       exclude_rpath_func=exclude_rpath_func)

    # update configs
    for config_type in allowed_bucket_config_types:
        sync_bucket_config(site_folder_status,
                           root=root,
                           node=node,
                           private_key=private_key,
                           config_type=config_type,
                           print_log=print_log)

    # store the site_folder_status
    dump_json_file(filepath=site_folder_status_config_filepath,
                   data=site_folder_status)
Exemplo n.º 18
0
def create_file_version(filepath,
                        force=False,
                        min_time_diff=60,
                        history_max_versions=150):
    # force 表示强制version
    # 将一个file_path进行拷贝到特定目录的读写操作,进而形成version的概念
    filepath = same_slash(filepath)

    # for Markdown file only
    if not _is_a_markdown_file(filepath):
        return

    if not os.path.exists(filepath):
        return  # ignore

    if os.path.isdir(filepath):
        return

    with open(filepath, 'rb') as f:
        raw_content = f.read()
        if not raw_content:
            return  # blank content, ignore

    raw_content = smart_str(raw_content)

    now = datetime.datetime.now()
    now_str = now.strftime('%Y-%m-%d %H-%M-%S')

    version_folder = get_file_versions_folder(filepath)
    if not version_folder:
        return  # ignore

    version_file_path = join(version_folder,
                             now_str + os.path.splitext(filepath)[1])

    if not os.path.isdir(version_folder):
        os.makedirs(version_folder)

    versions_file_names = os.listdir(version_folder)
    versions_file_names = [
        name for name in versions_file_names
        if re.search('\d{4}-\d{1,2}-\d{1,2}', name)
    ]
    versions_file_names.sort()
    versions_file_names.reverse()  # 倒序之后,就是最新的排最前面

    file_size = os.path.getsize(filepath)
    now = time.time()

    if versions_file_names and file_size < 30 * 1024:  # 对30k以下的文章,做version进一步的判断
        last_version = versions_file_names[0]
        last_path = join(version_folder, last_version)
        last_mtime = os.path.getmtime(last_path)
        with open(last_path) as f:
            last_content = f.read()
        if last_content == raw_content:  # 内容没有变化,ignore
            return

        length_diff = abs(len(last_content) - len(raw_content))
        if length_diff < 30 or 0 < (now -
                                    last_mtime) < min_time_diff and not force:
            # 内容长度变化小余30,或者最后修改时间1分钟内的,忽略掉
            return  # ignore
    elif versions_file_names:
        # 1 分钟内, 才会尝试创建一个 version
        last_version = versions_file_names[0]
        last_path = join(version_folder, last_version)
        last_mtime = os.path.getmtime(last_path)
        if 0 < (now - last_mtime) < min_time_diff and not force:
            return  # ignore

    if file_size < 10 * 1024:  # 10k以下
        max_versions = history_max_versions
        if max_versions < 0: max_versions = 0
    elif file_size < 100 * 1024:  # 100k
        max_versions = 80
    elif file_size < 500 * 1024:
        max_versions = 50
    else:
        max_versions = 20

    if not max_versions:  # ignore, versions is not allowed
        return

    for version_name_to_delete in versions_file_names[
            max_versions:]:  # 删除过多的版本文件
        file_path_to_delete = join(version_folder, version_name_to_delete)
        try:
            os.remove(file_path_to_delete)
        except IOError:
            pass

    try:
        with open(version_file_path, 'wb') as new_f:
            new_f.write(raw_content)
    except IOError:  # 失败
        return
Exemplo n.º 19
0
def get_sync_data_filepath(filepath, root, app_name):
    data_folder = make_sure_sync_data_folder(root, app_name)
    data_filename = get_sync_data_filename(filepath, root)
    data_filepath = join(data_folder, data_filename)
    return data_filepath
Exemplo n.º 20
0
def make_sure_sync_log_path(root, app_name):
    make_sure_sync_data_folder(root, app_name)
    sync_folder = join(root, '.sync')
    log_path = join(sync_folder, '%s.log' % app_name)
    return log_path
Exemplo n.º 21
0
def get_sync_log_path(root, app_name):
    sync_folder = join(root, '.sync')
    log_path = join(sync_folder, '%s.log' % app_name)
    return log_path
Exemplo n.º 22
0
def get_sync_data_folder(root, app_name):
    # 存储同步信息的目录
    data_path = join(root, '.sync/%s' % app_name)
    return data_path
Exemplo n.º 23
0
def store_sync_cursor(root, cursor, app_name):
    make_sure_sync_data_folder(root, app_name)
    sync_folder = join(root, '.sync')
    cursor_path = join(sync_folder, '%s.cursor' % app_name)
    with open(cursor_path, 'w') as f:
        f.write(cursor)
Exemplo n.º 24
0
def sync_from_farbox(root,
                     private_key,
                     node,
                     get_cursor_func=None,
                     save_cursor_func=None,
                     before_file_sync_func=None,
                     after_file_sync_func=None,
                     per_page=30):
    get_cursor_func = get_cursor_func or partial(default_get_cursor_func, root)
    save_cursor_func = save_cursor_func or partial(default_set_cursor_func,
                                                   root)
    cursor = get_cursor_func()

    message = dict(per_page=per_page)
    if cursor:
        message["cursor"] = cursor

    records = send_message(node,
                           private_key,
                           action="show_records",
                           message=message)
    if not isinstance(records, (list, tuple)):
        if settings.DEBUG:
            print("error:records from node is not list/tuple type")
        return

    #if settings.DEBUG:
    #    print("get %s records from node" % len(records))

    last_cursor = None
    error_happened = False
    will_continue = False
    if len(records) == per_page:
        will_continue = True
    for record in records:
        if not isinstance(record, dict):
            continue
        record_id = record.get("_id")
        server_side_file_version = record.get("version")
        last_cursor = record_id
        path = record.get("path")
        if not record_id or not path:
            continue
        is_dir = record.get("is_dir")
        if is_dir:
            continue
        is_deleted = record.get("is_deleted")
        if is_deleted:
            # 为了避免误删除,忽略 is_deleted 的逻辑
            continue
        abs_filepath = join(root, path)
        if server_side_file_version and os.path.isfile(abs_filepath):
            if get_md5_for_file(abs_filepath) == server_side_file_version:
                # 文件已经存在且重复了
                #if settings.DEBUG:
                #print("has same file on server side for %s" % abs_filepath)
                continue

        # 开始下载文件
        # 302 的跳转会自动处理,从而获得 200 的最终结果
        response = send_message(node,
                                private_key,
                                action="download_file",
                                message=dict(record_id=record_id),
                                timeout=120,
                                return_response=True)
        if not response:
            error_happened = True
            continue
        if response.status_code == 404:  # 404 就直接忽略
            continue
        elif response.status_code not in [200, 201]:
            error_happened = True
            continue

        raw_file_content = response.content
        if not raw_file_content:
            continue

        # 存储前的 hook, 比如做版本的存储
        if before_file_sync_func and hasattr(before_file_sync_func,
                                             "__call__"):
            before_file_sync_func(abs_filepath)

        # 如果文件已经存在,保存之前,先放到回收站了,给用户多一个反悔的可能;另外 Windows 上 send2trash 并不总是正确的,except 就直接 pass
        if os.path.isfile(abs_filepath) and send2trash is not None:
            try:
                send2trash.send2trash(abs_filepath)
            except:
                pass

        try:
            make_sure_path(abs_filepath)
            with open(abs_filepath, "wb") as f:
                f.write(smart_str(raw_file_content))
            if settings.DEBUG:
                print("downloaded %s" % abs_filepath)
        except:
            if settings.DEBUG:
                print_error()
            error_happened = True

        # 存储后的 hook
        if after_file_sync_func and hasattr(after_file_sync_func, "__call__"):
            after_file_sync_func(abs_filepath)

        # 存储日志
        store_sync_from_log(root, abs_filepath)

    if not error_happened and last_cursor:
        # 没有错误发生,才会保存 cursor,确保同步的数据尽可能保持一致性
        cursor_saved = save_cursor_func(last_cursor)
        if cursor_saved and will_continue:
            # 继续调用
            sync_from_farbox(root=root,
                             node=node,
                             private_key=private_key,
                             get_cursor_func=get_cursor_func,
                             save_cursor_func=save_cursor_func,
                             before_file_sync_func=before_file_sync_func,
                             after_file_sync_func=after_file_sync_func,
                             per_page=per_page)
        else:
            if cursor_saved:
                store_sync_from_log(root, "records updated")
            else:
                store_sync_from_log(root, "sync finished, no need to update")
            if settings.DEBUG:
                print("sync-from finished, %s records" % len(records))