Пример #1
0
 def load_seafdir_2(self):
     seafdir = fs_mgr.load_seafdir(self.repo_id_2, 1, self.first_did)
     self.assertEqual(self.first_did, seafdir.obj_id)
     self.assertIn('create_moved_folder', seafdir.dirents.keys())
     self.assertIn('create_moved_file.md', seafdir.dirents.keys())
     self.assertTrue(seafdir.dirents.get('create_moved_file.md', None))
     self.assertEqual('045dfc08495b5c6cbc1a4dc347f5e2987fd809f4', seafdir.dirents['create_moved_file.md'].id)
     self.assertTrue(seafdir.dirents.get('create_moved_folder', None))
     self.assertEqual('05a6f0455d1f11ecfc202f5e218274b092fd3dbc', seafdir.dirents['create_moved_folder'].id)
     seafdir = fs_mgr.load_seafdir(self.repo_id_2, 1, self.second_did)
     self.assertIn('added_folder.md', seafdir.dirents.keys())
     self.assertEqual(self.second_did, seafdir.obj_id)
Пример #2
0
 def load_seafdir_2(self):
     seafdir = fs_mgr.load_seafdir(self.repo_id_2, 1, self.first_did)
     self.assertEqual(self.first_did, seafdir.obj_id)
     self.assertIn('create_moved_folder', list(seafdir.dirents.keys()))
     self.assertIn('create_moved_file.md', list(seafdir.dirents.keys()))
     self.assertTrue(seafdir.dirents.get('create_moved_file.md', None))
     self.assertEqual('045dfc08495b5c6cbc1a4dc347f5e2987fd809f4',
                      seafdir.dirents['create_moved_file.md'].id)
     self.assertTrue(seafdir.dirents.get('create_moved_folder', None))
     self.assertEqual('05a6f0455d1f11ecfc202f5e218274b092fd3dbc',
                      seafdir.dirents['create_moved_folder'].id)
     seafdir = fs_mgr.load_seafdir(self.repo_id_2, 1, self.second_did)
     self.assertIn('added_folder.md', list(seafdir.dirents.keys()))
     self.assertEqual(self.second_did, seafdir.obj_id)
Пример #3
0
def get_root_dir(repo):
    """
    Get root commit dir
    """
    commits = seafile_api.get_commit_list(repo.id, 0, 1)
    commit = commit_mgr.load_commit(repo.id, repo.version, commits[0].id)
    return fs_mgr.load_seafdir(repo.id, repo.version, commit.root_id)
Пример #4
0
def list_file_in_dir(repo_id, dirents, op_type):
    _dirents = copy.copy(dirents)
    files = []
    while True:
        try:
            d = _dirents.pop()
        except IndexError:
            break
        else:
            dir_obj = fs_mgr.load_seafdir(repo_id, 1, d.obj_id, ret_unicode=True)
            new_path = None

            file_list = dir_obj.get_files_list()
            for _file in file_list:
                if op_type in ['rename', 'move']:
                    new_path = os.path.join(d.new_path, _file.name)
                new_file = DiffEntry(os.path.join(d.path, _file.name), _file.id, _file.size, new_path)
                files.append(new_file)

            subdir_list = dir_obj.get_subdirs_list()
            for _dir in subdir_list:
                if op_type in ['rename', 'move']:
                    new_path = os.path.join(d.new_path, _dir.name)
                new_dir = DiffEntry(os.path.join(d.path, _dir.name), _dir.id, new_path=new_path)
                _dirents.append(new_dir)

    return files
Пример #5
0
    def get_member_list(self):
        member_list = []
        d = self.obj

        if d.version == 0:
            file_mtimes = []
            try:
                file_mtimes = seafile_api.get_files_last_modified(self.repo.id, self.rel_path, -1)
            except:
                raise DAVError(HTTP_INTERNAL_ERROR)

            mtimes = {}
            for entry in file_mtimes:
                mtimes[entry.file_name] = entry.last_modified
        for name, dent in d.dirents.items():
            member_path = posixpath.join(self.path, name)
            member_rel_path = posixpath.join(self.rel_path, name)

            if dent.is_dir():
                obj = fs_mgr.load_seafdir(d.store_id, d.version, dent.id)
                res = SeafDirResource(member_path, self.repo, member_rel_path, obj, self.environ)
            elif dent.is_file():
                obj = fs_mgr.load_seafile(d.store_id, d.version, dent.id)
                res = SeafileResource(member_path, self.repo, member_rel_path, obj, self.environ)
            else:
                continue

            if d.version == 1:
                obj.last_modified = dent.mtime
            else:
                obj.last_modified = mtimes[name]

            member_list.append(res)

        return member_list
Пример #6
0
 def copy_dirent(obj, repo, owner, path):
     """
     Copies the files from Object Storage to local filesystem
     dir - SeafDir object
     fn - file name to be copied
     path - path in local file system where fn should be saved
     """
     if obj.is_dir():
         dpath = path + os.sep + obj.name
         d = fs_mgr.load_seafdir(repo.id, repo.version, obj.id)
         for dname, dobj in list(d.dirents.items()):
             copy_dirent(dobj, repo, owner, dpath)
     elif obj.is_file():
         plist = [p for p in path.split(os.sep) if p]
         absdirpath = os.path.join(task._extracted_tmp_dir, *plist)
         if not os.path.exists(absdirpath):
             os.makedirs(absdirpath)
         seaf = fs_mgr.load_seafile(repo.id, repo.version, obj.id)
         #fname = obj.name.decode('utf-8')
         fname = obj.name
         to_path = os.path.join(absdirpath, fname)
         write_seaf_to_path(seaf, to_path)
         logger.debug('File: {} copied to {}'.format(fname, to_path))
     else:
         logger.debug('Wrong seafile object: {}'.format(obj))
    def getMemberList(self):
        member_list = []
        d = self.obj

        if d.version == 0:
            file_mtimes = []
            try:
                file_mtimes = seafile_api.get_files_last_modified(self.repo.id, self.rel_path, -1)
            except:
                raise DAVError(HTTP_INTERNAL_ERROR)

            mtimes = UTF8Dict()
            for entry in file_mtimes:
                mtimes[entry.file_name] = entry.last_modified
        for name, dent in d.dirents.iteritems():
            member_path = utf8_path_join(self.path, name)
            member_rel_path = utf8_path_join(self.rel_path, name)

            if dent.is_dir():
                obj = fs_mgr.load_seafdir(d.store_id, d.version, dent.id)
                res = SeafDirResource(member_path, self.repo, member_rel_path, obj, self.environ)
            elif dent.is_file():
                obj = fs_mgr.load_seafile(d.store_id, d.version, dent.id)
                res = SeafileResource(member_path, self.repo, member_rel_path, obj, self.environ)
            else:
                continue

            if d.version == 1:
                obj.last_modified = dent.mtime
            else:
                obj.last_modified = mtimes[name]

            member_list.append(res)

        return member_list
Пример #8
0
    def test_multi_backend_read_dir(self):
        try:
            obj_stores = commit_mgr.obj_stores
        except AttributeError:
            return

        commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version,
                                        self.commit_id)
        dir = fs_mgr.load_seafdir(self.repo_id_2, self.repo_version,
                                  commit.root_id)

        self.assertEqual(len(dir.get_files_list()), 3)
        self.assertEqual(len(dir.get_subdirs_list()), 2)

        dir_a = dir.lookup('folder1')
        self.assertIsNotNone(dir_a)

        dir_b = dir.lookup('第二个中文目录')
        self.assertIsNotNone(dir_b)

        dir_x = dir.lookup('not.exist')
        self.assertIsNone(dir_x)

        file_a = dir.lookup('a.md')
        self.assertIsNotNone(file_a)
        self.assertEqual(file_a.size, 10)
        content = file_a.get_content()
        self.assertEqual(content, b'hello a.md')

        file_b = dir.lookup('一张照片.jpg')
        self.assertIsNotNone(file_b)
        self.assertTrue(file_b.size, 155067)

        # Test read file more than 1 blocks
        file_c = dir.lookup('glib.zip')
        self.assertIsNotNone(file_c)
        self.assertEqual(file_c.size, 3345765)
        content = file_c.get_content()
        with open(
                os.path.join(
                    os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                 'data'), 'glib.zip'), 'rb') as fp:
            content_r = fp.read()
        self.assertEqual(content, content_r)

        # Test stream read
        stream = file_c.get_stream()
        data = b''
        chunk_size = file_c.size // 5
        for i in range(5):
            data += stream.read(chunk_size)
            self.assertEqual(len(data), (i + 1) * chunk_size)
            self.assertEqual(data, content[:len(data)])
        stream.close()

        self.assertEqual(data, content)

        file_x = dir.lookup('not.exist')
        self.assertIsNone(file_x)
Пример #9
0
def get_file_by_path(repo_id, path):
    repo = seafile_api.get_repo(repo_id)
    dir = fs_mgr.load_seafdir(repo.id, repo.version,
                              get_commit_root_id(repo_id))
    paths = [_f for _f in path.split("/") if _f]
    for path in paths:
        dir = dir.lookup(path)
    return dir
Пример #10
0
def get_all_files_by_path(dir, repo, path, dir_map):
    for dName, dObj in list(dir.dirents.items()):
        dPath = path + os.sep + dObj.name
        if dObj.is_dir():
            get_all_files_by_path(
                fs_mgr.load_seafdir(repo.id, repo.version, dObj.id), repo,
                dPath, dir_map)
        if dObj.is_file():
            dir_map.update({dPath: hash_file(dir.lookup(dObj.name))})
    return dir_map
Пример #11
0
    def test_multi_backend_read_dir(self):
        try:
            obj_stores = commit_mgr.obj_stores
        except AttributeError:
            return

        commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version, self.commit_id)
        dir = fs_mgr.load_seafdir(self.repo_id_2, self.repo_version, commit.root_id)

        self.assertEquals(len(dir.get_files_list()), 3)
        self.assertEquals(len(dir.get_subdirs_list()), 2)

        dir_a = dir.lookup('folder1')
        self.assertIsNotNone(dir_a)

        dir_b = dir.lookup('第二个中文目录')
        self.assertIsNotNone(dir_b)

        dir_x = dir.lookup('not.exist')
        self.assertIsNone(dir_x)

        file_a = dir.lookup('a.md')
        self.assertIsNotNone(file_a)
        self.assertEquals(file_a.size, 10)
        content = file_a.get_content()
        self.assertEquals(content, 'hello a.md')

        file_b = dir.lookup('一张照片.jpg')
        self.assertIsNotNone(file_b)
        self.assertTrue(file_b.size, 155067)

        # Test read file more than 1 blocks
        file_c = dir.lookup('glib.zip')
        self.assertIsNotNone(file_c)
        self.assertEquals(file_c.size, 3345765)
        content = file_c.get_content()
        with open(os.path.join(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data'), 'glib.zip'), 'rb') as fp:
            content_r = fp.read()
        self.assertEquals(content, content_r)

        # Test stream read
        stream = file_c.get_stream()
        data = ''
        chunk_size = file_c.size / 5
        for i in xrange(5):
            data += stream.read(chunk_size)
            self.assertEquals(len(data), (i + 1) * chunk_size)
            self.assertEquals(data, content[:len(data)])
        stream.close()

        self.assertEquals(data, content)

        file_x = dir.lookup('not.exist')
        self.assertIsNone(file_x)
Пример #12
0
    def test_read_dir(self):
        commit = commit_mgr.load_commit(self.repo_id, self.repo_version,
                                        self.commit_id)
        dir = fs_mgr.load_seafdir(self.repo_id, self.repo_version,
                                  commit.root_id)

        import pprint
        pprint.pprint(dir.dirents)

        self.assertEquals(len(dir.get_files_list()), 3)
        self.assertEquals(len(dir.get_subdirs_list()), 2)

        dir_a = dir.lookup('folder1')
        self.assertIsNotNone(dir_a)

        dir_b = dir.lookup('第二个中文目录')
        self.assertIsNotNone(dir_b)

        dir_x = dir.lookup('not.exist')
        self.assertIsNone(dir_x)

        file_a = dir.lookup('a.md')
        self.assertIsNotNone(file_a)
        self.assertEquals(file_a.size, 10)
        content = file_a.get_content()
        self.assertEquals(content, 'hello a.md')

        file_b = dir.lookup('一张照片.jpg')
        self.assertIsNotNone(file_b)
        self.assertTrue(file_b.size, 155067)

        # Test read file more than 1 blocks
        file_c = dir.lookup('glib.zip')
        self.assertIsNotNone(file_c)
        self.assertEquals(file_c.size, 3345765)
        content = file_c.get_content()
        with open(os.path.join(data_dir, 'glib.zip'), 'rb') as fp:
            content_r = fp.read()
        self.assertEquals(content, content_r)

        # Test stream read
        stream = file_c.get_stream()
        data = ''
        chunk_size = file_c.size / 5
        for i in xrange(5):
            data += stream.read(chunk_size)
            self.assertEquals(len(data), (i + 1) * chunk_size)
            self.assertEquals(data, content[:len(data)])
        stream.close()

        self.assertEquals(data, content)

        file_x = dir.lookup('not.exist')
        self.assertIsNone(file_x)
Пример #13
0
def get_blocks(repo_id, version, root):
    queued_dirs = [root]
    blocks = set()
    while queued_dirs:
        cdir = fs_mgr.load_seafdir(repo_id, version, queued_dirs.pop())
        for dent in cdir.get_files_list():
            seafFile = fs_mgr.load_seafile(repo_id, version, dent.id)
            blocks.update(seafFile.blocks)

        for dent in cdir.get_subdirs_list():
            queued_dirs.append(dent.id)

    return blocks
Пример #14
0
    def test_read_dir(self):
        commit = commit_mgr.load_commit(self.repo_id, self.repo_version, self.commit_id)
        dir = fs_mgr.load_seafdir(self.repo_id, self.repo_version, commit.root_id)

        import pprint; pprint.pprint(dir.dirents)

        self.assertEquals(len(dir.get_files_list()), 3)
        self.assertEquals(len(dir.get_subdirs_list()), 2)

        dir_a = dir.lookup('folder1')
        self.assertIsNotNone(dir_a)

        dir_b = dir.lookup('第二个中文目录')
        self.assertIsNotNone(dir_b)

        dir_x = dir.lookup('not.exist')
        self.assertIsNone(dir_x)

        file_a = dir.lookup('a.md')
        self.assertIsNotNone(file_a)
        self.assertEquals(file_a.size, 10)
        content = file_a.get_content()
        self.assertEquals(content, 'hello a.md')

        file_b = dir.lookup('一张照片.jpg')
        self.assertIsNotNone(file_b)
        self.assertTrue(file_b.size, 155067)

        # Test read file more than 1 blocks
        file_c = dir.lookup('glib.zip')
        self.assertIsNotNone(file_c)
        self.assertEquals(file_c.size, 3345765)
        content = file_c.get_content()
        with open(os.path.join(data_dir, 'glib.zip'), 'rb') as fp:
            content_r = fp.read()
        self.assertEquals(content, content_r)

        # Test stream read
        stream = file_c.get_stream()
        data = ''
        chunk_size = file_c.size / 5
        for i in xrange(5):
            data += stream.read(chunk_size)
            self.assertEquals(len(data), (i + 1) * chunk_size)
            self.assertEquals(data, content[:len(data)])
        stream.close()

        self.assertEquals(data, content)

        file_x = dir.lookup('not.exist')
        self.assertIsNone(file_x)
Пример #15
0
def get_md_json(repo_id):
    repo = seafile_api.get_repo(repo_id)
    commit_id = get_latest_commit_root_id(repo)

    dir = fs_mgr.load_seafdir(repo.id, repo.version, commit_id)
    file = dir.lookup(ARCHIVE_METADATA_TARGET)
    if not file:
        md_dict = {}
        logger.info('archive-metadata.md file is not filled or missing.')
    else:
        md_dict = parse_markdown_doi(file.get_content().decode())
    if not md_dict.get('Author'):
        md_dict['Author'] = seafile_api.get_repo_owner(repo_id)
    if not md_dict.get('Title'):
        md_dict['Title'] = seafile_api.get_repo(repo_id).name
    if not md_dict.get('Year'):
        md_dict['Year'] = str(datetime.date.today().year)

    md_json = json.dumps(md_dict)
    return md_json
Пример #16
0
def get_blocks(repo_id, fname, commit_id=None):
    """Print out blocks of file for repo and commit
        repo_id: repo id
        commit_id: commit id
    """
    repo = get_repo(repo_id)
    commits = seafile_api.get_commit_list(repo.id, 0, MAX_INT)

    print "commits:", [(c.id, c.ctime) for c in commits]

    commit_id = commit_id if commit_id else commits[0].id
    commit = commit_mgr.load_commit(repo.id, repo.version, commit_id)

    dir = fs_mgr.load_seafdir(repo.id, repo.version, commit.root_id)

    file = dir.lookup(fname)
    print "File: %s, commit id: %s, root_id: %s" % (fname, commit_id,
                                                    commit.root_id)

    if file:
        print "blocks: ", file.blocks
    else:
        print "No file for this commit!"
Пример #17
0
    def diff(self):
        scan_files = []
        new_dirs = []  # (path, dir_id)
        queued_dirs = []  # (path, dir_id1, dir_id2)

        if ZERO_OBJ_ID == self.root1:
            self.root1 = None
        if ZERO_OBJ_ID == self.root2:
            self.root2 = None

        if self.root1 == self.root2:
            return scan_files
        elif not self.root1:
            new_dirs.append(('/', self.root2))
        elif self.root2:
            queued_dirs.append(('/', self.root1, self.root2))

        while True:
            path = old_id = new_id = None
            try:
                path, old_id, new_id = queued_dirs.pop(0)
            except IndexError:
                break

            dir1 = fs_mgr.load_seafdir(self.repo_id, self.version, old_id)
            dir2 = fs_mgr.load_seafdir(self.repo_id, self.version, new_id)

            for dent in dir1.get_files_list():
                new_dent = dir2.lookup_dent(dent.name)
                if new_dent and new_dent.type == dent.type:
                    dir2.remove_entry(dent.name)
                    if new_dent.id != dent.id:
                        scan_files.append(
                            (make_path(path,
                                       dent.name), new_dent.id, new_dent.size))

            scan_files.extend([(make_path(path, dent.name), dent.id, dent.size)
                               for dent in dir2.get_files_list()])

            for dent in dir1.get_subdirs_list():
                new_dent = dir2.lookup_dent(dent.name)
                if new_dent and new_dent.type == dent.type:
                    dir2.remove_entry(dent.name)
                    if new_dent.id != dent.id:
                        queued_dirs.append(
                            (make_path(path, dent.name), dent.id, new_dent.id))

            new_dirs.extend([(make_path(path, dent.name), dent.id)
                             for dent in dir2.get_subdirs_list()])

        while True:
            # Process newly added dirs and its sub-dirs, all files under
            # these dirs should be marked as added.
            path = obj_id = None
            try:
                path, obj_id = new_dirs.pop(0)
            except IndexError:
                break
            d = fs_mgr.load_seafdir(self.repo_id, self.version, obj_id)
            scan_files.extend([(make_path(path, dent.name), dent.id, dent.size)
                               for dent in d.get_files_list()])

            new_dirs.extend([(make_path(path, dent.name), dent.id)
                             for dent in d.get_subdirs_list()])

        return scan_files
Пример #18
0
    def diff(self):
        added_files = []
        deleted_files = []
        deleted_dirs = []
        modified_files = []
        added_dirs = []
        renamed_files = []
        renamed_dirs = []
        moved_files = []
        moved_dirs = []

        new_dirs = []
        del_dirs = []
        queued_dirs = []  # (path, dir_id1, dir_id2)

        if self.root1 == self.root2:
            return (added_files, deleted_files, added_dirs, deleted_dirs,
                    modified_files, renamed_files, moved_files, renamed_dirs,
                    moved_dirs)
        else:
            queued_dirs.append(('/', self.root1, self.root2))

        while True:
            path = old_id = new_id = None
            try:
                path, old_id, new_id = queued_dirs.pop(0)
            except IndexError:
                break

            dir1 = fs_mgr.load_seafdir(self.repo_id, self.version, old_id)
            dir2 = fs_mgr.load_seafdir(self.repo_id, self.version, new_id)

            for dent in dir1.get_files_list():
                new_dent = dir2.lookup_dent(dent.name)
                if not new_dent or new_dent.type != dent.type:
                    deleted_files.append(
                        DiffEntry(make_path(path, dent.name), dent.id,
                                  dent.size))
                else:
                    dir2.remove_entry(dent.name)
                    if new_dent.id == dent.id:
                        pass
                    else:
                        modified_files.append(
                            DiffEntry(make_path(path, dent.name), new_dent.id,
                                      new_dent.size))

            added_files.extend([
                DiffEntry(make_path(path, dent.name), dent.id, dent.size)
                for dent in dir2.get_files_list()
            ])

            for dent in dir1.get_subdirs_list():
                new_dent = dir2.lookup_dent(dent.name)
                if not new_dent or new_dent.type != dent.type:
                    del_dirs.append(
                        DiffEntry(make_path(path, dent.name), dent.id))
                else:
                    dir2.remove_entry(dent.name)
                    if new_dent.id == dent.id:
                        pass
                    else:
                        queued_dirs.append(
                            (make_path(path, dent.name), dent.id, new_dent.id))

            new_dirs.extend([
                DiffEntry(make_path(path, dent.name), dent.id)
                for dent in dir2.get_subdirs_list()
            ])

        if not self.fold_dirs:
            while True:
                # Process newly added dirs and its sub-dirs, all files under
                # these dirs should be marked as added.
                try:
                    dir_dent = new_dirs.pop(0)
                    added_dirs.append(DiffEntry(dir_dent.path,
                                                dir_dent.obj_id))
                except IndexError:
                    break
                d = fs_mgr.load_seafdir(self.repo_id, self.version,
                                        dir_dent.obj_id)
                added_files.extend([
                    DiffEntry(make_path(dir_dent.path, dent.name), dent.id,
                              dent.size) for dent in d.get_files_list()
                ])

                new_dirs.extend([
                    DiffEntry(make_path(dir_dent.path, dent.name), dent.id)
                    for dent in d.get_subdirs_list()
                ])

            while True:
                try:
                    dir_dent = del_dirs.pop(0)
                    deleted_dirs.append(
                        DiffEntry(dir_dent.path, dir_dent.obj_id))
                except IndexError:
                    break
                d = fs_mgr.load_seafdir(self.repo_id, self.version,
                                        dir_dent.obj_id)
                deleted_files.extend([
                    DiffEntry(make_path(dir_dent.path, dent.name), dent.id,
                              dent.size) for dent in d.get_files_list()
                ])

                del_dirs.extend([
                    DiffEntry(make_path(dir_dent.path, dent.name), dent.id)
                    for dent in d.get_subdirs_list()
                ])

        else:
            deleted_dirs = del_dirs
            added_dirs = new_dirs

        if self.handle_rename:
            ret_added_files = []
            ret_added_dirs = []

            # If an empty file or dir is generated from renaming or moving, just add it into both added_files
            # and deleted_files, because we can't know where it actually come from.
            del_file_dict = {}
            for de in deleted_files:
                if de.obj_id != ZERO_OBJ_ID:
                    del_file_dict[de.obj_id] = de

            for de in added_files:
                if de.obj_id in del_file_dict:
                    del_de = del_file_dict[de.obj_id]
                    if os.path.dirname(de.path) == os.path.dirname(
                            del_de.path):
                        # it's a rename operation if add and del are in the same dir
                        renamed_files.append(
                            DiffEntry(del_de.path, de.obj_id, de.size,
                                      de.path))
                    else:
                        moved_files.append(
                            DiffEntry(del_de.path, de.obj_id, de.size,
                                      de.path))
                    del del_file_dict[de.obj_id]
                else:
                    ret_added_files.append(de)

            del_dir_dict = {}
            for de in deleted_dirs:
                if de.obj_id != ZERO_OBJ_ID:
                    del_dir_dict[de.obj_id] = de

            for de in added_dirs:
                if de.obj_id in del_dir_dict:
                    del_de = del_dir_dict[de.obj_id]
                    if os.path.dirname(de.path) == os.path.dirname(
                            del_de.path):
                        renamed_dirs.append(
                            DiffEntry(del_de.path, de.obj_id, -1, de.path))
                    else:
                        moved_dirs.append(
                            DiffEntry(del_de.path, de.obj_id, -1, de.path))
                    del del_dir_dict[de.obj_id]
                else:
                    ret_added_dirs.append(de)

            ret_deleted_files = list(del_file_dict.values())
            ret_deleted_dirs = list(del_dir_dict.values())
            for de in deleted_files:
                if de.obj_id == ZERO_OBJ_ID:
                    ret_deleted_files.append(de)
            for de in deleted_dirs:
                if de.obj_id == ZERO_OBJ_ID:
                    ret_deleted_dirs.append(de)
        else:
            ret_added_files = added_files
            ret_deleted_files = deleted_files
            ret_added_dirs = added_dirs
            ret_deleted_dirs = deleted_dirs

        return (ret_added_files, ret_deleted_files, ret_added_dirs,
                ret_deleted_dirs, modified_files, renamed_files, moved_files,
                renamed_dirs, moved_dirs)
Пример #19
0
    def diff(self):
        added_files = []
        deleted_files = []
        deleted_dirs = []
        modified_files = []
        added_dirs = []
        renamed_files = []
        renamed_dirs = []
        moved_files = []
        moved_dirs = []

        new_dirs = []
        queued_dirs = []  # (path, dir_id1, dir_id2)

        if self.root1 == self.root2:
            return (added_files, deleted_files, added_dirs, deleted_dirs,
                    modified_files, renamed_files, moved_files, renamed_dirs,
                    moved_dirs)
        else:
            queued_dirs.append(('/', self.root1, self.root2))

        while True:
            path = old_id = new_id = None
            try:
                path, old_id, new_id = queued_dirs.pop(0)
            except IndexError:
                break

            dir1 = fs_mgr.load_seafdir(self.repo_id, self.version, old_id)
            dir2 = fs_mgr.load_seafdir(self.repo_id, self.version, new_id)

            for dent in dir1.get_files_list():
                new_dent = dir2.lookup_dent(dent.name)
                if not new_dent or new_dent.type != dent.type:
                    deleted_files.append(
                        DiffEntry(make_path(path, dent.name), dent.id,
                                  dent.size))
                else:
                    dir2.remove_entry(dent.name)
                    if new_dent.id == dent.id:
                        pass
                    else:
                        modified_files.append(
                            DiffEntry(make_path(path, dent.name), new_dent.id,
                                      new_dent.size))

            added_files.extend([
                DiffEntry(make_path(path, dent.name), dent.id, dent.size)
                for dent in dir2.get_files_list()
            ])

            for dent in dir1.get_subdirs_list():
                new_dent = dir2.lookup_dent(dent.name)
                if not new_dent or new_dent.type != dent.type:
                    deleted_dirs.append(
                        DiffEntry(make_path(path, dent.name), dent.id))
                else:
                    dir2.remove_entry(dent.name)
                    if new_dent.id == dent.id:
                        pass
                    else:
                        queued_dirs.append(
                            (make_path(path, dent.name), dent.id, new_dent.id))

            new_dirs.extend([
                DiffEntry(make_path(path, dent.name), dent.id)
                for dent in dir2.get_subdirs_list()
            ])

        if not self.fold_dirs:
            while True:
                # Process newly added dirs and its sub-dirs, all files under
                # these dirs should be marked as added.
                path = obj_id = None
                try:
                    dir_dent = new_dirs.pop(0)
                    added_dirs.append(DiffEntry(dir_dent.path,
                                                dir_dent.obj_id))
                except IndexError:
                    break
                d = fs_mgr.load_seafdir(self.repo_id, self.version,
                                        dir_dent.obj_id)
                added_files.extend([
                    DiffEntry(make_path(path, dent.name), dent.id, dent.size)
                    for dent in d.get_files_list()
                ])

                added_dirs.extend([
                    DiffEntry(make_path(path, dent.name), dent.id)
                    for dent in d.get_subdirs_list()
                ])
        else:
            added_dirs = new_dirs

        if self.handle_rename:
            ret_added_files = []
            ret_added_dirs = []

            del_file_dict = {de.obj_id: de for de in deleted_files}
            for de in added_files:
                if de.obj_id in del_file_dict:
                    del_de = del_file_dict[de.obj_id]
                    if os.path.dirname(de.path) == os.path.dirname(
                            del_de.path):
                        # it's a rename operation if add and del are in the same dir
                        renamed_files.append(
                            DiffEntry(del_de.path, de.obj_id, de.size,
                                      de.path))
                    else:
                        moved_files.append(
                            DiffEntry(del_de.path, de.obj_id, de.size,
                                      de.path))
                    del del_file_dict[de.obj_id]
                else:
                    ret_added_files.append(de)

            del_dir_dict = {de.obj_id: de for de in deleted_dirs}
            for de in added_dirs:
                if de.obj_id in del_dir_dict:
                    del_de = del_dir_dict[de.obj_id]
                    if os.path.dirname(de.path) == os.path.dirname(
                            del_de.path):
                        renamed_dirs.append(
                            DiffEntry(del_de.path, de.obj_id, -1, de.path))
                    else:
                        moved_dirs.append(
                            DiffEntry(del_de.path, de.obj_id, -1, de.path))
                    del del_dir_dict[de.obj_id]
                else:
                    ret_added_dirs.append(de)

            added_files = ret_added_files
            added_dirs = ret_added_dirs
            deleted_files = del_file_dict.values()
            deleted_dirs = del_dir_dict.values()

        return (added_files, deleted_files, added_dirs, deleted_dirs,
                modified_files, renamed_files, moved_files, renamed_dirs,
                moved_dirs)
Пример #20
0
def get_root_dir(repo, commit_root_id):
    """
    Get root commit dir
    """
    return fs_mgr.load_seafdir(repo.id, repo.version, commit_root_id)
Пример #21
0
def hash_library(repo_id, user_email):
    repo = seafile_api.get_repo(repo_id)
    dir = fs_mgr.load_seafdir(repo.id, repo.version,
                              get_commit_root_id(repo_id))
    file_map = get_all_files_by_path(dir, repo, '', {})
    return file_map
Пример #22
0
    def diff(self, root2_time):  # noqa: C901
        added_files = []
        deleted_files = []
        deleted_dirs = []
        modified_files = []
        added_dirs = []

        new_dirs = []  # (path, dir_id)
        queued_dirs = []  # (path, dir_id1, dir_id2)

        if ZERO_OBJ_ID == self.root1:
            self.root1 = None
        if ZERO_OBJ_ID == self.root2:
            self.root2 = None

        if self.root1 == self.root2:
            return (added_files, deleted_files, added_dirs, deleted_dirs,
                    modified_files)
        elif not self.root1:
            new_dirs.append(('/', self.root2, root2_time, None))
        elif not self.root2:
            deleted_dirs.append('/')
        else:
            queued_dirs.append(('/', self.root1, self.root2))

        while True:
            path = old_id = new_id = None
            try:
                path, old_id, new_id = queued_dirs.pop(0)
            except IndexError:
                break

            dir1 = fs_mgr.load_seafdir(self.repo_id, self.version, old_id)
            dir2 = fs_mgr.load_seafdir(self.repo_id, self.version, new_id)

            for dent in dir1.get_files_list():
                new_dent = dir2.lookup_dent(dent.name)
                if not new_dent or new_dent.type != dent.type:
                    deleted_files.append(make_path(path, dent.name))
                else:
                    dir2.remove_entry(dent.name)
                    if new_dent.id == dent.id:
                        pass
                    else:
                        modified_files.append(
                            (make_path(path, dent.name), new_dent.id,
                             new_dent.mtime, new_dent.size))

            added_files.extend([(make_path(path, dent.name), dent.id,
                                 dent.mtime, dent.size)
                                for dent in dir2.get_files_list()])

            for dent in dir1.get_subdirs_list():
                new_dent = dir2.lookup_dent(dent.name)
                if not new_dent or new_dent.type != dent.type:
                    deleted_dirs.append(make_path(path, dent.name))
                else:
                    dir2.remove_entry(dent.name)
                    if new_dent.id == dent.id:
                        pass
                    else:
                        queued_dirs.append(
                            (make_path(path, dent.name), dent.id, new_dent.id))

            new_dirs.extend([(make_path(path, dent.name), dent.id, dent.mtime,
                              dent.size) for dent in dir2.get_subdirs_list()])

        while True:
            # Process newly added dirs and its sub-dirs, all files under
            # these dirs should be marked as added.
            path = obj_id = None
            try:
                path, obj_id, mtime, size = new_dirs.pop(0)
                added_dirs.append((path, obj_id, mtime, size))
            except IndexError:
                break
            d = fs_mgr.load_seafdir(self.repo_id, self.version, obj_id)
            added_files.extend([(make_path(path, dent.name), dent.id,
                                 dent.mtime, dent.size)
                                for dent in d.get_files_list()])

            new_dirs.extend([(make_path(path, dent.name), dent.id, dent.mtime,
                              dent.size) for dent in d.get_subdirs_list()])

        return (added_files, deleted_files, added_dirs, deleted_dirs,
                modified_files)
Пример #23
0
def generate_certificate(repo, commit):
    """ Generate Cared Data Certificate according to markdown file """

    #exit if repo encrypted
    if repo.encrypted:
        return False

    # exit if repo is system template
    if repo.rep_desc == TEMPLATE_DESC:
        return False

    dir = fs_mgr.load_seafdir(repo.id, repo.version, commit.root_id)

    # certificate already exists in root
    # file_names = [f.name for f in dir.get_files_list()]
    # if any(file_name.startswith(CDC_PDF_PREFIX) and file_name.endswith('.pdf') for file_name in file_names):
        # return False


    # get latest version of the ARCHIVE_METADATA_TARGET
    file = dir.lookup(ARCHIVE_METADATA_TARGET)

    #exit if no metadata file exists
    if not file:
        return False

    #check wether there is at least one creative dirent
    if not has_at_least_one_creative_dirent(dir):
        return False
    logging.info('Repo has creative dirents')


    try:
        db = get_db(KEEPER_DB_NAME)

        cur = db.cursor()

        # if is_certified(db, cur, repo.id):
            # return False

        owner = seafile_api.get_repo_owner(repo.id)
        logging.info("Certifying repo id: %s, name: %s, owner: %s ..." % (repo.id, repo.name, owner))
        cdc_dict = parse_markdown(file.get_content())
        if validate(cdc_dict):

            cdc_id = register_cdc_in_db(db, cur, repo.id, owner)

            logging.info("Generate CDC PDF...")
            cdc_pdf = CDC_PDF_PREFIX + cdc_id + ".pdf"
            # TODO: specify which url should be in CDC
            # as tmp decision: SERVICE_URL
            # repo_share_url = get_repo_share_url(repo.id, owner)
            repo_share_url = SERVICE_URL
            jars = ":".join(map(lambda e : MODULE_PATH + '/' + e, CDC_GENERATOR_JARS))
            args = [ "java", "-cp", jars, CDC_GENERATOR_MAIN_CLASS,
                    "-i", "\"" + cdc_id + "\"",
                    "-t", "\"" + cdc_dict['Title']  + "\"",
                    "-aa", "\"" + cdc_dict['Author']  + "\"",
                    "-d", "\"" + cdc_dict['Description']  + "\"",
                    "-c", "\"" + owner  + "\"",
                    "-u", "\"" + repo_share_url  + "\"",
                    cdc_pdf ]
            check_call(args)
            tmp_path = os.path.abspath(cdc_pdf)
            logging.info("PDF sucessfully generated")

            logging.info("Add " + cdc_pdf + " to the repo...")
            if UPDATE:
                seafile_api.put_file(repo.id, tmp_path, "/", cdc_pdf, SERVER_EMAIL, None)
                logging.info("Sucessfully updated")
            else:
                seafile_api.post_file(repo.id, tmp_path, "/", cdc_pdf, SERVER_EMAIL)
                logging.info("Sucessfully added")
            if not DEBUG:
                send_email(owner, {'USER_NAME': get_user_name(owner), 'PROJECT_NAME':repo.name, 'PROJECT_URL':get_repo_pivate_url(repo.id) })

                        #TODO: Send seafile notification
    except Exception as err:
        logging.info(str(err))
    finally:
       # other final stuff
        db.close()
        if 'tmp_path' in vars() and os.path.exists(tmp_path):
            os.remove(tmp_path)

    return True
Пример #24
0
def get_metadata(repo_id, user_email, action_type):
    """ Read metadata from libray root folder"""

    repo = seafile_api.get_repo(repo_id)
    commit_id = get_latest_commit_root_id(repo)

    notification_type = MSG_TYPE_KEEPER_DOI_MSG if action_type == "assign DOI" else MSG_TYPE_KEEPER_ARCHIVING_MSG
    # exit if repo is system template
    if repo.rep_desc == TEMPLATE_DESC:
        msg = _('Cannot ' + action_type +
                ' if the library is system template destination.')
        send_notification(msg, repo_id, notification_type, user_email)
        return {
            'error': msg,
        }

    if seafile_api.get_repo_history_limit(repo_id) > -1:
        msg = _('Cannot ' + action_type + ' because of the histroy setting.')
        send_notification(msg, repo_id, notification_type, user_email)
        return {
            'error': msg,
        }

    try:
        dir = fs_mgr.load_seafdir(repo.id, repo.version, commit_id)
        if not has_at_least_one_creative_dirent(dir):
            msg = _('Cannot ' + action_type +
                    ' if the library has no content.')
            send_notification(msg, repo_id, notification_type, user_email)
            return {
                'error': msg,
            }
        LOGGER.info('Repo has content')

        file = dir.lookup(ARCHIVE_METADATA_TARGET)
        if not file:
            msg = _('Cannot ' + action_type +
                    ' if archive-metadata.md file is not filled or missing.')
            send_notification(msg, repo_id, notification_type, user_email)
            return {
                'error': msg,
            }
        owner = seafile_api.get_repo_owner(repo.id)
        LOGGER.info(
            "Assigning DOI for repo id: {}, name: {}, owner: {} ...".format(
                repo.id, repo.name, owner))
        doi_dict = parse_markdown_doi(file.get_content().decode())
        ## Add hardcoded DOI metadata
        ## TODO: will be editable in next DOI releases
        doi_dict.update({
            'Publisher': PUBLISHER,
            'Resource Type': RESOURCE_TYPE
        })
        LOGGER.info(doi_dict)

        doi_msg = validate(doi_dict, repo_id, user_email)
        if len(doi_msg) > 0:
            return {
                'error':
                ' '.join(doi_msg) + ' ' +
                _('Please check out notifications for more details.'),
            }
        return doi_dict

    except Exception as err:
        LOGGER.error(str(err))
        raise err
Пример #25
0
def get_catalog():

    catalog = []

    repos_all = seafile_api.get_repo_list(0, MAX_INT)
    #repos_all = [seafile_api.get_repo('a6d4ae75-b063-40bf-a3d9-dde74623bb2c')]

    for repo in repos_all:

        try:
            proj = {}
            proj["id"] = repo.id
            proj["name"] = repo.name
            email = get_repo_owner(repo.id)
            proj["owner"] = email
            user_name = get_user_name(email)
            if user_name != email:
                proj["owner_name"] = user_name 
            proj["in_progress"] = True

            commits = get_commits(repo.id, 0, 1)
            commit = get_commit(repo.id, repo.version, commits[0].id)
            dir = fs_mgr.load_seafdir(repo.id, repo.version, commit.root_id)
            file = dir.lookup(ARCHIVE_METADATA_TARGET)
            if file:
                md = parse_markdown(file.get_content())
                if md:
                    # Author
                    a = md.get("Author")
                    if a:
                        a_list = strip_uni(a.strip()).split('\n')
                        authors = []
                        for _ in a_list:
                            author = {}
                            aa = _.split(';')
                            author['name'] = aa[0]
                            if len(aa) > 1 and aa[1].strip():
                                author['affs'] = [x.strip() for x in aa[1].split('|')]
                                author['affs'] = [x for x in author['affs'] if x ]
                            authors.append(author)
                        if a:
                            proj["authors"] = authors

                    # Description
                    d = strip_uni(md.get("Description"))
                    if d:
                        proj["description"] = d

                    # Comments
                    c = strip_uni(md.get("Comments"))
                    if c:
                        proj["comments"] = c

                    #Title
                    t = strip_uni(md.get("Title"))
                    if t:
                        proj["title"] = t
                        del proj["in_progress"]
                    
                    proj["is_certified"] = is_certified_by_repo_id(repo.id)
            else:
                if DEBUG:
                    print "No %s for repo %s found" % (ARCHIVE_METADATA_TARGET, repo.name)
            catalog.append(proj)    

        except Exception as err:
            msg = "repo_name: %s, id: %s, err: %s" % ( repo.name, repo.id, str(err) ) 
            logging.error (msg)
            if DEBUG:
                print msg

    return catalog
Пример #26
0
    def diff(self):
        added_files = []
        deleted_files = []
        deleted_dirs = []
        modified_files = []
        added_dirs = []
        renamed_files = []
        renamed_dirs = []
        moved_files = []
        moved_dirs = []

        new_dirs = []
        del_dirs = []
        queued_dirs = [] # (path, dir_id1, dir_id2)

        if self.root1 == self.root2:
            return (added_files, deleted_files, added_dirs, deleted_dirs,
                    modified_files, renamed_files, moved_files,
                    renamed_dirs, moved_dirs)
        else:
            queued_dirs.append(('/', self.root1, self.root2))

        while True:
            path = old_id = new_id = None
            try:
                path, old_id, new_id = queued_dirs.pop(0)
            except IndexError:
                break

            dir1 = fs_mgr.load_seafdir(self.repo_id, self.version, old_id)
            dir2 = fs_mgr.load_seafdir(self.repo_id, self.version, new_id)

            for dent in dir1.get_files_list():
                new_dent = dir2.lookup_dent(dent.name)
                if not new_dent or new_dent.type != dent.type:
                    deleted_files.append(DiffEntry(make_path(path, dent.name), dent.id, dent.size))
                else:
                    dir2.remove_entry(dent.name)
                    if new_dent.id == dent.id:
                        pass
                    else:
                        modified_files.append(DiffEntry(make_path(path, dent.name), new_dent.id, new_dent.size))

            added_files.extend([DiffEntry(make_path(path, dent.name), dent.id, dent.size) for dent in dir2.get_files_list()])

            for dent in dir1.get_subdirs_list():
                new_dent = dir2.lookup_dent(dent.name)
                if not new_dent or new_dent.type != dent.type:
                    del_dirs.append(DiffEntry(make_path(path, dent.name), dent.id))
                else:
                    dir2.remove_entry(dent.name)
                    if new_dent.id == dent.id:
                        pass
                    else:
                        queued_dirs.append((make_path(path, dent.name), dent.id, new_dent.id))

            new_dirs.extend([DiffEntry(make_path(path, dent.name), dent.id) for dent in dir2.get_subdirs_list()])

        if not self.fold_dirs:
            while True:
                # Process newly added dirs and its sub-dirs, all files under
                # these dirs should be marked as added.
                try:
                    dir_dent = new_dirs.pop(0)
                    added_dirs.append(DiffEntry(dir_dent.path, dir_dent.obj_id))
                except IndexError:
                    break
                d = fs_mgr.load_seafdir(self.repo_id, self.version, dir_dent.obj_id)
                added_files.extend([DiffEntry(make_path(dir_dent.path, dent.name), dent.id, dent.size) for dent in d.get_files_list()])

                new_dirs.extend([DiffEntry(make_path(dir_dent.path, dent.name), dent.id) for dent in d.get_subdirs_list()])

            while True:
                try:
                    dir_dent = del_dirs.pop(0)
                    deleted_dirs.append(DiffEntry(dir_dent.path, dir_dent.obj_id))
                except IndexError:
                    break
                d = fs_mgr.load_seafdir(self.repo_id, self.version, dir_dent.obj_id)
                deleted_files.extend([DiffEntry(make_path(dir_dent.path, dent.name), dent.id, dent.size) for dent in d.get_files_list()])

                del_dirs.extend([DiffEntry(make_path(dir_dent.path, dent.name), dent.id) for dent in d.get_subdirs_list()])

        else:
            deleted_dirs = del_dirs
            added_dirs = new_dirs

        if self.handle_rename:
            ret_added_files = []
            ret_added_dirs = []

            # If an empty file or dir is generated from renaming or moving, just add it into both added_files
            # and deleted_files, because we can't know where it actually come from.
            del_file_dict = {}
            for de in deleted_files:
                if de.obj_id != ZERO_OBJ_ID:
                    del_file_dict[de.obj_id] = de

            for de in added_files:
                if de.obj_id in del_file_dict:
                    del_de = del_file_dict[de.obj_id]
                    if os.path.dirname(de.path) == os.path.dirname(del_de.path):
                        # it's a rename operation if add and del are in the same dir
                        renamed_files.append(DiffEntry(del_de.path, de.obj_id, de.size, de.path))
                    else:
                        moved_files.append(DiffEntry(del_de.path, de.obj_id, de.size, de.path))
                    del del_file_dict[de.obj_id]
                else:
                    ret_added_files.append(de)

            del_dir_dict = {}
            for de in deleted_dirs:
                if de.obj_id != ZERO_OBJ_ID:
                    del_dir_dict[de.obj_id] = de

            for de in added_dirs:
                if de.obj_id in del_dir_dict:
                    del_de = del_dir_dict[de.obj_id]
                    if os.path.dirname(de.path) == os.path.dirname(del_de.path):
                        renamed_dirs.append(DiffEntry(del_de.path, de.obj_id, -1, de.path))
                    else:
                        moved_dirs.append(DiffEntry(del_de.path, de.obj_id, -1, de.path))
                    del del_dir_dict[de.obj_id]
                else:
                    ret_added_dirs.append(de)

            ret_deleted_files = del_file_dict.values()
            ret_deleted_dirs = del_dir_dict.values()
            for de in deleted_files:
                if de.obj_id == ZERO_OBJ_ID:
                    ret_deleted_files.append(de)
            for de in deleted_dirs:
                if de.obj_id == ZERO_OBJ_ID:
                    ret_deleted_dirs.append(de)
        else:
            ret_added_files = added_files
            ret_deleted_files = deleted_files
            ret_added_dirs = added_dirs
            ret_deleted_dirs = deleted_dirs

        return (ret_added_files, ret_deleted_files, ret_added_dirs, ret_deleted_dirs,
                modified_files, renamed_files, moved_files,
                renamed_dirs, moved_dirs)
Пример #27
0
def get_repo_root_seafdir(repo):
    root_id = commit_mgr.get_commit_root_id(repo.id, repo.version,
                                            repo.head_cmmt_id)
    return fs_mgr.load_seafdir(repo.store_id, repo.version, root_id)
def get_repo_root_seafdir(repo):
    root_id = commit_mgr.get_commit_root_id(repo.id, repo.version, repo.head_cmmt_id)
    return fs_mgr.load_seafdir(repo.store_id, repo.version, root_id)
Пример #29
0
def generate_catalog_entry(repo):
    """
    Generate catalog entry in for the repo DB
    """
    reconnect_db()

    proj = {}

    try:
        proj["id"] = repo.id
        proj["name"] = repo.name
        email = get_repo_owner(repo.id)
        proj["owner"] = email
        user_name = get_user_name(email)
        if user_name != email:
            proj["owner_name"] = user_name
        proj["in_progress"] = True
        proj["modified"] = repo.last_modify

        commits = get_commits(repo.id, 0, 1)
        commit = get_commit(repo.id, repo.version, commits[0].id)
        dir = fs_mgr.load_seafdir(repo.id, repo.version, commit.root_id)
        file = dir.lookup(ARCHIVE_METADATA_TARGET)
        if file:
            md = file.get_content().decode('utf-8')
            md = parse_markdown(md)
            if md:
                # Author
                a = md.get("Author")
                if a:
                    a_list = a.split('\n')
                    authors = []
                    for _ in a_list:
                        author = {}
                        aa = _.split(';')
                        author['name'] = aa[0]
                        if len(aa) > 1 and aa[1].strip():
                            author['affs'] = [
                                x.strip() for x in aa[1].split('|')
                            ]
                            author['affs'] = [x for x in author['affs'] if x]
                        authors.append(author)
                    if a:
                        proj["authors"] = authors

                # Description
                d = md.get("Description")
                if d:
                    proj["description"] = d

                # Comments
                c = md.get("Comments")
                if c:
                    proj["comments"] = c

                # Title
                t = md.get("Title")
                if t:
                    proj["title"] = t
                    del proj["in_progress"]

                # Year
                y = md.get("Year")
                if y:
                    proj["year"] = y

                # Institute
                i = md.get("Institute")
                if i:
                    proj["institute"] = i

                proj["is_certified"] = is_certified_by_repo_id(repo.id)

        # add or update project metadata in DB
        c = Catalog.objects.add_or_update_by_repo_id(repo.id, email, proj,
                                                     repo.name)
        # Catalog_id
        proj["catalog_id"] = str(c.catalog_id)

    except Exception:
        msg = "repo_name: %s, id: %s" % (repo.name, repo.id)
        logging.error(msg)
        logging.error(traceback.format_exc())

    return proj