def load_seafdir_2(self): seafdir = fs_mgr.load_seafdir(self.repo_id_2, 1, self.first_did) self.assertEqual(self.first_did, seafdir.obj_id) self.assertIn('create_moved_folder', seafdir.dirents.keys()) self.assertIn('create_moved_file.md', seafdir.dirents.keys()) self.assertTrue(seafdir.dirents.get('create_moved_file.md', None)) self.assertEqual('045dfc08495b5c6cbc1a4dc347f5e2987fd809f4', seafdir.dirents['create_moved_file.md'].id) self.assertTrue(seafdir.dirents.get('create_moved_folder', None)) self.assertEqual('05a6f0455d1f11ecfc202f5e218274b092fd3dbc', seafdir.dirents['create_moved_folder'].id) seafdir = fs_mgr.load_seafdir(self.repo_id_2, 1, self.second_did) self.assertIn('added_folder.md', seafdir.dirents.keys()) self.assertEqual(self.second_did, seafdir.obj_id)
def load_seafdir_2(self): seafdir = fs_mgr.load_seafdir(self.repo_id_2, 1, self.first_did) self.assertEqual(self.first_did, seafdir.obj_id) self.assertIn('create_moved_folder', list(seafdir.dirents.keys())) self.assertIn('create_moved_file.md', list(seafdir.dirents.keys())) self.assertTrue(seafdir.dirents.get('create_moved_file.md', None)) self.assertEqual('045dfc08495b5c6cbc1a4dc347f5e2987fd809f4', seafdir.dirents['create_moved_file.md'].id) self.assertTrue(seafdir.dirents.get('create_moved_folder', None)) self.assertEqual('05a6f0455d1f11ecfc202f5e218274b092fd3dbc', seafdir.dirents['create_moved_folder'].id) seafdir = fs_mgr.load_seafdir(self.repo_id_2, 1, self.second_did) self.assertIn('added_folder.md', list(seafdir.dirents.keys())) self.assertEqual(self.second_did, seafdir.obj_id)
def get_root_dir(repo): """ Get root commit dir """ commits = seafile_api.get_commit_list(repo.id, 0, 1) commit = commit_mgr.load_commit(repo.id, repo.version, commits[0].id) return fs_mgr.load_seafdir(repo.id, repo.version, commit.root_id)
def list_file_in_dir(repo_id, dirents, op_type): _dirents = copy.copy(dirents) files = [] while True: try: d = _dirents.pop() except IndexError: break else: dir_obj = fs_mgr.load_seafdir(repo_id, 1, d.obj_id, ret_unicode=True) new_path = None file_list = dir_obj.get_files_list() for _file in file_list: if op_type in ['rename', 'move']: new_path = os.path.join(d.new_path, _file.name) new_file = DiffEntry(os.path.join(d.path, _file.name), _file.id, _file.size, new_path) files.append(new_file) subdir_list = dir_obj.get_subdirs_list() for _dir in subdir_list: if op_type in ['rename', 'move']: new_path = os.path.join(d.new_path, _dir.name) new_dir = DiffEntry(os.path.join(d.path, _dir.name), _dir.id, new_path=new_path) _dirents.append(new_dir) return files
def get_member_list(self): member_list = [] d = self.obj if d.version == 0: file_mtimes = [] try: file_mtimes = seafile_api.get_files_last_modified(self.repo.id, self.rel_path, -1) except: raise DAVError(HTTP_INTERNAL_ERROR) mtimes = {} for entry in file_mtimes: mtimes[entry.file_name] = entry.last_modified for name, dent in d.dirents.items(): member_path = posixpath.join(self.path, name) member_rel_path = posixpath.join(self.rel_path, name) if dent.is_dir(): obj = fs_mgr.load_seafdir(d.store_id, d.version, dent.id) res = SeafDirResource(member_path, self.repo, member_rel_path, obj, self.environ) elif dent.is_file(): obj = fs_mgr.load_seafile(d.store_id, d.version, dent.id) res = SeafileResource(member_path, self.repo, member_rel_path, obj, self.environ) else: continue if d.version == 1: obj.last_modified = dent.mtime else: obj.last_modified = mtimes[name] member_list.append(res) return member_list
def copy_dirent(obj, repo, owner, path): """ Copies the files from Object Storage to local filesystem dir - SeafDir object fn - file name to be copied path - path in local file system where fn should be saved """ if obj.is_dir(): dpath = path + os.sep + obj.name d = fs_mgr.load_seafdir(repo.id, repo.version, obj.id) for dname, dobj in list(d.dirents.items()): copy_dirent(dobj, repo, owner, dpath) elif obj.is_file(): plist = [p for p in path.split(os.sep) if p] absdirpath = os.path.join(task._extracted_tmp_dir, *plist) if not os.path.exists(absdirpath): os.makedirs(absdirpath) seaf = fs_mgr.load_seafile(repo.id, repo.version, obj.id) #fname = obj.name.decode('utf-8') fname = obj.name to_path = os.path.join(absdirpath, fname) write_seaf_to_path(seaf, to_path) logger.debug('File: {} copied to {}'.format(fname, to_path)) else: logger.debug('Wrong seafile object: {}'.format(obj))
def getMemberList(self): member_list = [] d = self.obj if d.version == 0: file_mtimes = [] try: file_mtimes = seafile_api.get_files_last_modified(self.repo.id, self.rel_path, -1) except: raise DAVError(HTTP_INTERNAL_ERROR) mtimes = UTF8Dict() for entry in file_mtimes: mtimes[entry.file_name] = entry.last_modified for name, dent in d.dirents.iteritems(): member_path = utf8_path_join(self.path, name) member_rel_path = utf8_path_join(self.rel_path, name) if dent.is_dir(): obj = fs_mgr.load_seafdir(d.store_id, d.version, dent.id) res = SeafDirResource(member_path, self.repo, member_rel_path, obj, self.environ) elif dent.is_file(): obj = fs_mgr.load_seafile(d.store_id, d.version, dent.id) res = SeafileResource(member_path, self.repo, member_rel_path, obj, self.environ) else: continue if d.version == 1: obj.last_modified = dent.mtime else: obj.last_modified = mtimes[name] member_list.append(res) return member_list
def test_multi_backend_read_dir(self): try: obj_stores = commit_mgr.obj_stores except AttributeError: return commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version, self.commit_id) dir = fs_mgr.load_seafdir(self.repo_id_2, self.repo_version, commit.root_id) self.assertEqual(len(dir.get_files_list()), 3) self.assertEqual(len(dir.get_subdirs_list()), 2) dir_a = dir.lookup('folder1') self.assertIsNotNone(dir_a) dir_b = dir.lookup('第二个中文目录') self.assertIsNotNone(dir_b) dir_x = dir.lookup('not.exist') self.assertIsNone(dir_x) file_a = dir.lookup('a.md') self.assertIsNotNone(file_a) self.assertEqual(file_a.size, 10) content = file_a.get_content() self.assertEqual(content, b'hello a.md') file_b = dir.lookup('一张照片.jpg') self.assertIsNotNone(file_b) self.assertTrue(file_b.size, 155067) # Test read file more than 1 blocks file_c = dir.lookup('glib.zip') self.assertIsNotNone(file_c) self.assertEqual(file_c.size, 3345765) content = file_c.get_content() with open( os.path.join( os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data'), 'glib.zip'), 'rb') as fp: content_r = fp.read() self.assertEqual(content, content_r) # Test stream read stream = file_c.get_stream() data = b'' chunk_size = file_c.size // 5 for i in range(5): data += stream.read(chunk_size) self.assertEqual(len(data), (i + 1) * chunk_size) self.assertEqual(data, content[:len(data)]) stream.close() self.assertEqual(data, content) file_x = dir.lookup('not.exist') self.assertIsNone(file_x)
def get_file_by_path(repo_id, path): repo = seafile_api.get_repo(repo_id) dir = fs_mgr.load_seafdir(repo.id, repo.version, get_commit_root_id(repo_id)) paths = [_f for _f in path.split("/") if _f] for path in paths: dir = dir.lookup(path) return dir
def get_all_files_by_path(dir, repo, path, dir_map): for dName, dObj in list(dir.dirents.items()): dPath = path + os.sep + dObj.name if dObj.is_dir(): get_all_files_by_path( fs_mgr.load_seafdir(repo.id, repo.version, dObj.id), repo, dPath, dir_map) if dObj.is_file(): dir_map.update({dPath: hash_file(dir.lookup(dObj.name))}) return dir_map
def test_multi_backend_read_dir(self): try: obj_stores = commit_mgr.obj_stores except AttributeError: return commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version, self.commit_id) dir = fs_mgr.load_seafdir(self.repo_id_2, self.repo_version, commit.root_id) self.assertEquals(len(dir.get_files_list()), 3) self.assertEquals(len(dir.get_subdirs_list()), 2) dir_a = dir.lookup('folder1') self.assertIsNotNone(dir_a) dir_b = dir.lookup('第二个中文目录') self.assertIsNotNone(dir_b) dir_x = dir.lookup('not.exist') self.assertIsNone(dir_x) file_a = dir.lookup('a.md') self.assertIsNotNone(file_a) self.assertEquals(file_a.size, 10) content = file_a.get_content() self.assertEquals(content, 'hello a.md') file_b = dir.lookup('一张照片.jpg') self.assertIsNotNone(file_b) self.assertTrue(file_b.size, 155067) # Test read file more than 1 blocks file_c = dir.lookup('glib.zip') self.assertIsNotNone(file_c) self.assertEquals(file_c.size, 3345765) content = file_c.get_content() with open(os.path.join(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data'), 'glib.zip'), 'rb') as fp: content_r = fp.read() self.assertEquals(content, content_r) # Test stream read stream = file_c.get_stream() data = '' chunk_size = file_c.size / 5 for i in xrange(5): data += stream.read(chunk_size) self.assertEquals(len(data), (i + 1) * chunk_size) self.assertEquals(data, content[:len(data)]) stream.close() self.assertEquals(data, content) file_x = dir.lookup('not.exist') self.assertIsNone(file_x)
def test_read_dir(self): commit = commit_mgr.load_commit(self.repo_id, self.repo_version, self.commit_id) dir = fs_mgr.load_seafdir(self.repo_id, self.repo_version, commit.root_id) import pprint pprint.pprint(dir.dirents) self.assertEquals(len(dir.get_files_list()), 3) self.assertEquals(len(dir.get_subdirs_list()), 2) dir_a = dir.lookup('folder1') self.assertIsNotNone(dir_a) dir_b = dir.lookup('第二个中文目录') self.assertIsNotNone(dir_b) dir_x = dir.lookup('not.exist') self.assertIsNone(dir_x) file_a = dir.lookup('a.md') self.assertIsNotNone(file_a) self.assertEquals(file_a.size, 10) content = file_a.get_content() self.assertEquals(content, 'hello a.md') file_b = dir.lookup('一张照片.jpg') self.assertIsNotNone(file_b) self.assertTrue(file_b.size, 155067) # Test read file more than 1 blocks file_c = dir.lookup('glib.zip') self.assertIsNotNone(file_c) self.assertEquals(file_c.size, 3345765) content = file_c.get_content() with open(os.path.join(data_dir, 'glib.zip'), 'rb') as fp: content_r = fp.read() self.assertEquals(content, content_r) # Test stream read stream = file_c.get_stream() data = '' chunk_size = file_c.size / 5 for i in xrange(5): data += stream.read(chunk_size) self.assertEquals(len(data), (i + 1) * chunk_size) self.assertEquals(data, content[:len(data)]) stream.close() self.assertEquals(data, content) file_x = dir.lookup('not.exist') self.assertIsNone(file_x)
def get_blocks(repo_id, version, root): queued_dirs = [root] blocks = set() while queued_dirs: cdir = fs_mgr.load_seafdir(repo_id, version, queued_dirs.pop()) for dent in cdir.get_files_list(): seafFile = fs_mgr.load_seafile(repo_id, version, dent.id) blocks.update(seafFile.blocks) for dent in cdir.get_subdirs_list(): queued_dirs.append(dent.id) return blocks
def test_read_dir(self): commit = commit_mgr.load_commit(self.repo_id, self.repo_version, self.commit_id) dir = fs_mgr.load_seafdir(self.repo_id, self.repo_version, commit.root_id) import pprint; pprint.pprint(dir.dirents) self.assertEquals(len(dir.get_files_list()), 3) self.assertEquals(len(dir.get_subdirs_list()), 2) dir_a = dir.lookup('folder1') self.assertIsNotNone(dir_a) dir_b = dir.lookup('第二个中文目录') self.assertIsNotNone(dir_b) dir_x = dir.lookup('not.exist') self.assertIsNone(dir_x) file_a = dir.lookup('a.md') self.assertIsNotNone(file_a) self.assertEquals(file_a.size, 10) content = file_a.get_content() self.assertEquals(content, 'hello a.md') file_b = dir.lookup('一张照片.jpg') self.assertIsNotNone(file_b) self.assertTrue(file_b.size, 155067) # Test read file more than 1 blocks file_c = dir.lookup('glib.zip') self.assertIsNotNone(file_c) self.assertEquals(file_c.size, 3345765) content = file_c.get_content() with open(os.path.join(data_dir, 'glib.zip'), 'rb') as fp: content_r = fp.read() self.assertEquals(content, content_r) # Test stream read stream = file_c.get_stream() data = '' chunk_size = file_c.size / 5 for i in xrange(5): data += stream.read(chunk_size) self.assertEquals(len(data), (i + 1) * chunk_size) self.assertEquals(data, content[:len(data)]) stream.close() self.assertEquals(data, content) file_x = dir.lookup('not.exist') self.assertIsNone(file_x)
def get_md_json(repo_id): repo = seafile_api.get_repo(repo_id) commit_id = get_latest_commit_root_id(repo) dir = fs_mgr.load_seafdir(repo.id, repo.version, commit_id) file = dir.lookup(ARCHIVE_METADATA_TARGET) if not file: md_dict = {} logger.info('archive-metadata.md file is not filled or missing.') else: md_dict = parse_markdown_doi(file.get_content().decode()) if not md_dict.get('Author'): md_dict['Author'] = seafile_api.get_repo_owner(repo_id) if not md_dict.get('Title'): md_dict['Title'] = seafile_api.get_repo(repo_id).name if not md_dict.get('Year'): md_dict['Year'] = str(datetime.date.today().year) md_json = json.dumps(md_dict) return md_json
def get_blocks(repo_id, fname, commit_id=None): """Print out blocks of file for repo and commit repo_id: repo id commit_id: commit id """ repo = get_repo(repo_id) commits = seafile_api.get_commit_list(repo.id, 0, MAX_INT) print "commits:", [(c.id, c.ctime) for c in commits] commit_id = commit_id if commit_id else commits[0].id commit = commit_mgr.load_commit(repo.id, repo.version, commit_id) dir = fs_mgr.load_seafdir(repo.id, repo.version, commit.root_id) file = dir.lookup(fname) print "File: %s, commit id: %s, root_id: %s" % (fname, commit_id, commit.root_id) if file: print "blocks: ", file.blocks else: print "No file for this commit!"
def diff(self): scan_files = [] new_dirs = [] # (path, dir_id) queued_dirs = [] # (path, dir_id1, dir_id2) if ZERO_OBJ_ID == self.root1: self.root1 = None if ZERO_OBJ_ID == self.root2: self.root2 = None if self.root1 == self.root2: return scan_files elif not self.root1: new_dirs.append(('/', self.root2)) elif self.root2: queued_dirs.append(('/', self.root1, self.root2)) while True: path = old_id = new_id = None try: path, old_id, new_id = queued_dirs.pop(0) except IndexError: break dir1 = fs_mgr.load_seafdir(self.repo_id, self.version, old_id) dir2 = fs_mgr.load_seafdir(self.repo_id, self.version, new_id) for dent in dir1.get_files_list(): new_dent = dir2.lookup_dent(dent.name) if new_dent and new_dent.type == dent.type: dir2.remove_entry(dent.name) if new_dent.id != dent.id: scan_files.append( (make_path(path, dent.name), new_dent.id, new_dent.size)) scan_files.extend([(make_path(path, dent.name), dent.id, dent.size) for dent in dir2.get_files_list()]) for dent in dir1.get_subdirs_list(): new_dent = dir2.lookup_dent(dent.name) if new_dent and new_dent.type == dent.type: dir2.remove_entry(dent.name) if new_dent.id != dent.id: queued_dirs.append( (make_path(path, dent.name), dent.id, new_dent.id)) new_dirs.extend([(make_path(path, dent.name), dent.id) for dent in dir2.get_subdirs_list()]) while True: # Process newly added dirs and its sub-dirs, all files under # these dirs should be marked as added. path = obj_id = None try: path, obj_id = new_dirs.pop(0) except IndexError: break d = fs_mgr.load_seafdir(self.repo_id, self.version, obj_id) scan_files.extend([(make_path(path, dent.name), dent.id, dent.size) for dent in d.get_files_list()]) new_dirs.extend([(make_path(path, dent.name), dent.id) for dent in d.get_subdirs_list()]) return scan_files
def diff(self): added_files = [] deleted_files = [] deleted_dirs = [] modified_files = [] added_dirs = [] renamed_files = [] renamed_dirs = [] moved_files = [] moved_dirs = [] new_dirs = [] del_dirs = [] queued_dirs = [] # (path, dir_id1, dir_id2) if self.root1 == self.root2: return (added_files, deleted_files, added_dirs, deleted_dirs, modified_files, renamed_files, moved_files, renamed_dirs, moved_dirs) else: queued_dirs.append(('/', self.root1, self.root2)) while True: path = old_id = new_id = None try: path, old_id, new_id = queued_dirs.pop(0) except IndexError: break dir1 = fs_mgr.load_seafdir(self.repo_id, self.version, old_id) dir2 = fs_mgr.load_seafdir(self.repo_id, self.version, new_id) for dent in dir1.get_files_list(): new_dent = dir2.lookup_dent(dent.name) if not new_dent or new_dent.type != dent.type: deleted_files.append( DiffEntry(make_path(path, dent.name), dent.id, dent.size)) else: dir2.remove_entry(dent.name) if new_dent.id == dent.id: pass else: modified_files.append( DiffEntry(make_path(path, dent.name), new_dent.id, new_dent.size)) added_files.extend([ DiffEntry(make_path(path, dent.name), dent.id, dent.size) for dent in dir2.get_files_list() ]) for dent in dir1.get_subdirs_list(): new_dent = dir2.lookup_dent(dent.name) if not new_dent or new_dent.type != dent.type: del_dirs.append( DiffEntry(make_path(path, dent.name), dent.id)) else: dir2.remove_entry(dent.name) if new_dent.id == dent.id: pass else: queued_dirs.append( (make_path(path, dent.name), dent.id, new_dent.id)) new_dirs.extend([ DiffEntry(make_path(path, dent.name), dent.id) for dent in dir2.get_subdirs_list() ]) if not self.fold_dirs: while True: # Process newly added dirs and its sub-dirs, all files under # these dirs should be marked as added. try: dir_dent = new_dirs.pop(0) added_dirs.append(DiffEntry(dir_dent.path, dir_dent.obj_id)) except IndexError: break d = fs_mgr.load_seafdir(self.repo_id, self.version, dir_dent.obj_id) added_files.extend([ DiffEntry(make_path(dir_dent.path, dent.name), dent.id, dent.size) for dent in d.get_files_list() ]) new_dirs.extend([ DiffEntry(make_path(dir_dent.path, dent.name), dent.id) for dent in d.get_subdirs_list() ]) while True: try: dir_dent = del_dirs.pop(0) deleted_dirs.append( DiffEntry(dir_dent.path, dir_dent.obj_id)) except IndexError: break d = fs_mgr.load_seafdir(self.repo_id, self.version, dir_dent.obj_id) deleted_files.extend([ DiffEntry(make_path(dir_dent.path, dent.name), dent.id, dent.size) for dent in d.get_files_list() ]) del_dirs.extend([ DiffEntry(make_path(dir_dent.path, dent.name), dent.id) for dent in d.get_subdirs_list() ]) else: deleted_dirs = del_dirs added_dirs = new_dirs if self.handle_rename: ret_added_files = [] ret_added_dirs = [] # If an empty file or dir is generated from renaming or moving, just add it into both added_files # and deleted_files, because we can't know where it actually come from. del_file_dict = {} for de in deleted_files: if de.obj_id != ZERO_OBJ_ID: del_file_dict[de.obj_id] = de for de in added_files: if de.obj_id in del_file_dict: del_de = del_file_dict[de.obj_id] if os.path.dirname(de.path) == os.path.dirname( del_de.path): # it's a rename operation if add and del are in the same dir renamed_files.append( DiffEntry(del_de.path, de.obj_id, de.size, de.path)) else: moved_files.append( DiffEntry(del_de.path, de.obj_id, de.size, de.path)) del del_file_dict[de.obj_id] else: ret_added_files.append(de) del_dir_dict = {} for de in deleted_dirs: if de.obj_id != ZERO_OBJ_ID: del_dir_dict[de.obj_id] = de for de in added_dirs: if de.obj_id in del_dir_dict: del_de = del_dir_dict[de.obj_id] if os.path.dirname(de.path) == os.path.dirname( del_de.path): renamed_dirs.append( DiffEntry(del_de.path, de.obj_id, -1, de.path)) else: moved_dirs.append( DiffEntry(del_de.path, de.obj_id, -1, de.path)) del del_dir_dict[de.obj_id] else: ret_added_dirs.append(de) ret_deleted_files = list(del_file_dict.values()) ret_deleted_dirs = list(del_dir_dict.values()) for de in deleted_files: if de.obj_id == ZERO_OBJ_ID: ret_deleted_files.append(de) for de in deleted_dirs: if de.obj_id == ZERO_OBJ_ID: ret_deleted_dirs.append(de) else: ret_added_files = added_files ret_deleted_files = deleted_files ret_added_dirs = added_dirs ret_deleted_dirs = deleted_dirs return (ret_added_files, ret_deleted_files, ret_added_dirs, ret_deleted_dirs, modified_files, renamed_files, moved_files, renamed_dirs, moved_dirs)
def diff(self): added_files = [] deleted_files = [] deleted_dirs = [] modified_files = [] added_dirs = [] renamed_files = [] renamed_dirs = [] moved_files = [] moved_dirs = [] new_dirs = [] queued_dirs = [] # (path, dir_id1, dir_id2) if self.root1 == self.root2: return (added_files, deleted_files, added_dirs, deleted_dirs, modified_files, renamed_files, moved_files, renamed_dirs, moved_dirs) else: queued_dirs.append(('/', self.root1, self.root2)) while True: path = old_id = new_id = None try: path, old_id, new_id = queued_dirs.pop(0) except IndexError: break dir1 = fs_mgr.load_seafdir(self.repo_id, self.version, old_id) dir2 = fs_mgr.load_seafdir(self.repo_id, self.version, new_id) for dent in dir1.get_files_list(): new_dent = dir2.lookup_dent(dent.name) if not new_dent or new_dent.type != dent.type: deleted_files.append( DiffEntry(make_path(path, dent.name), dent.id, dent.size)) else: dir2.remove_entry(dent.name) if new_dent.id == dent.id: pass else: modified_files.append( DiffEntry(make_path(path, dent.name), new_dent.id, new_dent.size)) added_files.extend([ DiffEntry(make_path(path, dent.name), dent.id, dent.size) for dent in dir2.get_files_list() ]) for dent in dir1.get_subdirs_list(): new_dent = dir2.lookup_dent(dent.name) if not new_dent or new_dent.type != dent.type: deleted_dirs.append( DiffEntry(make_path(path, dent.name), dent.id)) else: dir2.remove_entry(dent.name) if new_dent.id == dent.id: pass else: queued_dirs.append( (make_path(path, dent.name), dent.id, new_dent.id)) new_dirs.extend([ DiffEntry(make_path(path, dent.name), dent.id) for dent in dir2.get_subdirs_list() ]) if not self.fold_dirs: while True: # Process newly added dirs and its sub-dirs, all files under # these dirs should be marked as added. path = obj_id = None try: dir_dent = new_dirs.pop(0) added_dirs.append(DiffEntry(dir_dent.path, dir_dent.obj_id)) except IndexError: break d = fs_mgr.load_seafdir(self.repo_id, self.version, dir_dent.obj_id) added_files.extend([ DiffEntry(make_path(path, dent.name), dent.id, dent.size) for dent in d.get_files_list() ]) added_dirs.extend([ DiffEntry(make_path(path, dent.name), dent.id) for dent in d.get_subdirs_list() ]) else: added_dirs = new_dirs if self.handle_rename: ret_added_files = [] ret_added_dirs = [] del_file_dict = {de.obj_id: de for de in deleted_files} for de in added_files: if de.obj_id in del_file_dict: del_de = del_file_dict[de.obj_id] if os.path.dirname(de.path) == os.path.dirname( del_de.path): # it's a rename operation if add and del are in the same dir renamed_files.append( DiffEntry(del_de.path, de.obj_id, de.size, de.path)) else: moved_files.append( DiffEntry(del_de.path, de.obj_id, de.size, de.path)) del del_file_dict[de.obj_id] else: ret_added_files.append(de) del_dir_dict = {de.obj_id: de for de in deleted_dirs} for de in added_dirs: if de.obj_id in del_dir_dict: del_de = del_dir_dict[de.obj_id] if os.path.dirname(de.path) == os.path.dirname( del_de.path): renamed_dirs.append( DiffEntry(del_de.path, de.obj_id, -1, de.path)) else: moved_dirs.append( DiffEntry(del_de.path, de.obj_id, -1, de.path)) del del_dir_dict[de.obj_id] else: ret_added_dirs.append(de) added_files = ret_added_files added_dirs = ret_added_dirs deleted_files = del_file_dict.values() deleted_dirs = del_dir_dict.values() return (added_files, deleted_files, added_dirs, deleted_dirs, modified_files, renamed_files, moved_files, renamed_dirs, moved_dirs)
def get_root_dir(repo, commit_root_id): """ Get root commit dir """ return fs_mgr.load_seafdir(repo.id, repo.version, commit_root_id)
def hash_library(repo_id, user_email): repo = seafile_api.get_repo(repo_id) dir = fs_mgr.load_seafdir(repo.id, repo.version, get_commit_root_id(repo_id)) file_map = get_all_files_by_path(dir, repo, '', {}) return file_map
def diff(self, root2_time): # noqa: C901 added_files = [] deleted_files = [] deleted_dirs = [] modified_files = [] added_dirs = [] new_dirs = [] # (path, dir_id) queued_dirs = [] # (path, dir_id1, dir_id2) if ZERO_OBJ_ID == self.root1: self.root1 = None if ZERO_OBJ_ID == self.root2: self.root2 = None if self.root1 == self.root2: return (added_files, deleted_files, added_dirs, deleted_dirs, modified_files) elif not self.root1: new_dirs.append(('/', self.root2, root2_time, None)) elif not self.root2: deleted_dirs.append('/') else: queued_dirs.append(('/', self.root1, self.root2)) while True: path = old_id = new_id = None try: path, old_id, new_id = queued_dirs.pop(0) except IndexError: break dir1 = fs_mgr.load_seafdir(self.repo_id, self.version, old_id) dir2 = fs_mgr.load_seafdir(self.repo_id, self.version, new_id) for dent in dir1.get_files_list(): new_dent = dir2.lookup_dent(dent.name) if not new_dent or new_dent.type != dent.type: deleted_files.append(make_path(path, dent.name)) else: dir2.remove_entry(dent.name) if new_dent.id == dent.id: pass else: modified_files.append( (make_path(path, dent.name), new_dent.id, new_dent.mtime, new_dent.size)) added_files.extend([(make_path(path, dent.name), dent.id, dent.mtime, dent.size) for dent in dir2.get_files_list()]) for dent in dir1.get_subdirs_list(): new_dent = dir2.lookup_dent(dent.name) if not new_dent or new_dent.type != dent.type: deleted_dirs.append(make_path(path, dent.name)) else: dir2.remove_entry(dent.name) if new_dent.id == dent.id: pass else: queued_dirs.append( (make_path(path, dent.name), dent.id, new_dent.id)) new_dirs.extend([(make_path(path, dent.name), dent.id, dent.mtime, dent.size) for dent in dir2.get_subdirs_list()]) while True: # Process newly added dirs and its sub-dirs, all files under # these dirs should be marked as added. path = obj_id = None try: path, obj_id, mtime, size = new_dirs.pop(0) added_dirs.append((path, obj_id, mtime, size)) except IndexError: break d = fs_mgr.load_seafdir(self.repo_id, self.version, obj_id) added_files.extend([(make_path(path, dent.name), dent.id, dent.mtime, dent.size) for dent in d.get_files_list()]) new_dirs.extend([(make_path(path, dent.name), dent.id, dent.mtime, dent.size) for dent in d.get_subdirs_list()]) return (added_files, deleted_files, added_dirs, deleted_dirs, modified_files)
def generate_certificate(repo, commit): """ Generate Cared Data Certificate according to markdown file """ #exit if repo encrypted if repo.encrypted: return False # exit if repo is system template if repo.rep_desc == TEMPLATE_DESC: return False dir = fs_mgr.load_seafdir(repo.id, repo.version, commit.root_id) # certificate already exists in root # file_names = [f.name for f in dir.get_files_list()] # if any(file_name.startswith(CDC_PDF_PREFIX) and file_name.endswith('.pdf') for file_name in file_names): # return False # get latest version of the ARCHIVE_METADATA_TARGET file = dir.lookup(ARCHIVE_METADATA_TARGET) #exit if no metadata file exists if not file: return False #check wether there is at least one creative dirent if not has_at_least_one_creative_dirent(dir): return False logging.info('Repo has creative dirents') try: db = get_db(KEEPER_DB_NAME) cur = db.cursor() # if is_certified(db, cur, repo.id): # return False owner = seafile_api.get_repo_owner(repo.id) logging.info("Certifying repo id: %s, name: %s, owner: %s ..." % (repo.id, repo.name, owner)) cdc_dict = parse_markdown(file.get_content()) if validate(cdc_dict): cdc_id = register_cdc_in_db(db, cur, repo.id, owner) logging.info("Generate CDC PDF...") cdc_pdf = CDC_PDF_PREFIX + cdc_id + ".pdf" # TODO: specify which url should be in CDC # as tmp decision: SERVICE_URL # repo_share_url = get_repo_share_url(repo.id, owner) repo_share_url = SERVICE_URL jars = ":".join(map(lambda e : MODULE_PATH + '/' + e, CDC_GENERATOR_JARS)) args = [ "java", "-cp", jars, CDC_GENERATOR_MAIN_CLASS, "-i", "\"" + cdc_id + "\"", "-t", "\"" + cdc_dict['Title'] + "\"", "-aa", "\"" + cdc_dict['Author'] + "\"", "-d", "\"" + cdc_dict['Description'] + "\"", "-c", "\"" + owner + "\"", "-u", "\"" + repo_share_url + "\"", cdc_pdf ] check_call(args) tmp_path = os.path.abspath(cdc_pdf) logging.info("PDF sucessfully generated") logging.info("Add " + cdc_pdf + " to the repo...") if UPDATE: seafile_api.put_file(repo.id, tmp_path, "/", cdc_pdf, SERVER_EMAIL, None) logging.info("Sucessfully updated") else: seafile_api.post_file(repo.id, tmp_path, "/", cdc_pdf, SERVER_EMAIL) logging.info("Sucessfully added") if not DEBUG: send_email(owner, {'USER_NAME': get_user_name(owner), 'PROJECT_NAME':repo.name, 'PROJECT_URL':get_repo_pivate_url(repo.id) }) #TODO: Send seafile notification except Exception as err: logging.info(str(err)) finally: # other final stuff db.close() if 'tmp_path' in vars() and os.path.exists(tmp_path): os.remove(tmp_path) return True
def get_metadata(repo_id, user_email, action_type): """ Read metadata from libray root folder""" repo = seafile_api.get_repo(repo_id) commit_id = get_latest_commit_root_id(repo) notification_type = MSG_TYPE_KEEPER_DOI_MSG if action_type == "assign DOI" else MSG_TYPE_KEEPER_ARCHIVING_MSG # exit if repo is system template if repo.rep_desc == TEMPLATE_DESC: msg = _('Cannot ' + action_type + ' if the library is system template destination.') send_notification(msg, repo_id, notification_type, user_email) return { 'error': msg, } if seafile_api.get_repo_history_limit(repo_id) > -1: msg = _('Cannot ' + action_type + ' because of the histroy setting.') send_notification(msg, repo_id, notification_type, user_email) return { 'error': msg, } try: dir = fs_mgr.load_seafdir(repo.id, repo.version, commit_id) if not has_at_least_one_creative_dirent(dir): msg = _('Cannot ' + action_type + ' if the library has no content.') send_notification(msg, repo_id, notification_type, user_email) return { 'error': msg, } LOGGER.info('Repo has content') file = dir.lookup(ARCHIVE_METADATA_TARGET) if not file: msg = _('Cannot ' + action_type + ' if archive-metadata.md file is not filled or missing.') send_notification(msg, repo_id, notification_type, user_email) return { 'error': msg, } owner = seafile_api.get_repo_owner(repo.id) LOGGER.info( "Assigning DOI for repo id: {}, name: {}, owner: {} ...".format( repo.id, repo.name, owner)) doi_dict = parse_markdown_doi(file.get_content().decode()) ## Add hardcoded DOI metadata ## TODO: will be editable in next DOI releases doi_dict.update({ 'Publisher': PUBLISHER, 'Resource Type': RESOURCE_TYPE }) LOGGER.info(doi_dict) doi_msg = validate(doi_dict, repo_id, user_email) if len(doi_msg) > 0: return { 'error': ' '.join(doi_msg) + ' ' + _('Please check out notifications for more details.'), } return doi_dict except Exception as err: LOGGER.error(str(err)) raise err
def get_catalog(): catalog = [] repos_all = seafile_api.get_repo_list(0, MAX_INT) #repos_all = [seafile_api.get_repo('a6d4ae75-b063-40bf-a3d9-dde74623bb2c')] for repo in repos_all: try: proj = {} proj["id"] = repo.id proj["name"] = repo.name email = get_repo_owner(repo.id) proj["owner"] = email user_name = get_user_name(email) if user_name != email: proj["owner_name"] = user_name proj["in_progress"] = True commits = get_commits(repo.id, 0, 1) commit = get_commit(repo.id, repo.version, commits[0].id) dir = fs_mgr.load_seafdir(repo.id, repo.version, commit.root_id) file = dir.lookup(ARCHIVE_METADATA_TARGET) if file: md = parse_markdown(file.get_content()) if md: # Author a = md.get("Author") if a: a_list = strip_uni(a.strip()).split('\n') authors = [] for _ in a_list: author = {} aa = _.split(';') author['name'] = aa[0] if len(aa) > 1 and aa[1].strip(): author['affs'] = [x.strip() for x in aa[1].split('|')] author['affs'] = [x for x in author['affs'] if x ] authors.append(author) if a: proj["authors"] = authors # Description d = strip_uni(md.get("Description")) if d: proj["description"] = d # Comments c = strip_uni(md.get("Comments")) if c: proj["comments"] = c #Title t = strip_uni(md.get("Title")) if t: proj["title"] = t del proj["in_progress"] proj["is_certified"] = is_certified_by_repo_id(repo.id) else: if DEBUG: print "No %s for repo %s found" % (ARCHIVE_METADATA_TARGET, repo.name) catalog.append(proj) except Exception as err: msg = "repo_name: %s, id: %s, err: %s" % ( repo.name, repo.id, str(err) ) logging.error (msg) if DEBUG: print msg return catalog
def diff(self): added_files = [] deleted_files = [] deleted_dirs = [] modified_files = [] added_dirs = [] renamed_files = [] renamed_dirs = [] moved_files = [] moved_dirs = [] new_dirs = [] del_dirs = [] queued_dirs = [] # (path, dir_id1, dir_id2) if self.root1 == self.root2: return (added_files, deleted_files, added_dirs, deleted_dirs, modified_files, renamed_files, moved_files, renamed_dirs, moved_dirs) else: queued_dirs.append(('/', self.root1, self.root2)) while True: path = old_id = new_id = None try: path, old_id, new_id = queued_dirs.pop(0) except IndexError: break dir1 = fs_mgr.load_seafdir(self.repo_id, self.version, old_id) dir2 = fs_mgr.load_seafdir(self.repo_id, self.version, new_id) for dent in dir1.get_files_list(): new_dent = dir2.lookup_dent(dent.name) if not new_dent or new_dent.type != dent.type: deleted_files.append(DiffEntry(make_path(path, dent.name), dent.id, dent.size)) else: dir2.remove_entry(dent.name) if new_dent.id == dent.id: pass else: modified_files.append(DiffEntry(make_path(path, dent.name), new_dent.id, new_dent.size)) added_files.extend([DiffEntry(make_path(path, dent.name), dent.id, dent.size) for dent in dir2.get_files_list()]) for dent in dir1.get_subdirs_list(): new_dent = dir2.lookup_dent(dent.name) if not new_dent or new_dent.type != dent.type: del_dirs.append(DiffEntry(make_path(path, dent.name), dent.id)) else: dir2.remove_entry(dent.name) if new_dent.id == dent.id: pass else: queued_dirs.append((make_path(path, dent.name), dent.id, new_dent.id)) new_dirs.extend([DiffEntry(make_path(path, dent.name), dent.id) for dent in dir2.get_subdirs_list()]) if not self.fold_dirs: while True: # Process newly added dirs and its sub-dirs, all files under # these dirs should be marked as added. try: dir_dent = new_dirs.pop(0) added_dirs.append(DiffEntry(dir_dent.path, dir_dent.obj_id)) except IndexError: break d = fs_mgr.load_seafdir(self.repo_id, self.version, dir_dent.obj_id) added_files.extend([DiffEntry(make_path(dir_dent.path, dent.name), dent.id, dent.size) for dent in d.get_files_list()]) new_dirs.extend([DiffEntry(make_path(dir_dent.path, dent.name), dent.id) for dent in d.get_subdirs_list()]) while True: try: dir_dent = del_dirs.pop(0) deleted_dirs.append(DiffEntry(dir_dent.path, dir_dent.obj_id)) except IndexError: break d = fs_mgr.load_seafdir(self.repo_id, self.version, dir_dent.obj_id) deleted_files.extend([DiffEntry(make_path(dir_dent.path, dent.name), dent.id, dent.size) for dent in d.get_files_list()]) del_dirs.extend([DiffEntry(make_path(dir_dent.path, dent.name), dent.id) for dent in d.get_subdirs_list()]) else: deleted_dirs = del_dirs added_dirs = new_dirs if self.handle_rename: ret_added_files = [] ret_added_dirs = [] # If an empty file or dir is generated from renaming or moving, just add it into both added_files # and deleted_files, because we can't know where it actually come from. del_file_dict = {} for de in deleted_files: if de.obj_id != ZERO_OBJ_ID: del_file_dict[de.obj_id] = de for de in added_files: if de.obj_id in del_file_dict: del_de = del_file_dict[de.obj_id] if os.path.dirname(de.path) == os.path.dirname(del_de.path): # it's a rename operation if add and del are in the same dir renamed_files.append(DiffEntry(del_de.path, de.obj_id, de.size, de.path)) else: moved_files.append(DiffEntry(del_de.path, de.obj_id, de.size, de.path)) del del_file_dict[de.obj_id] else: ret_added_files.append(de) del_dir_dict = {} for de in deleted_dirs: if de.obj_id != ZERO_OBJ_ID: del_dir_dict[de.obj_id] = de for de in added_dirs: if de.obj_id in del_dir_dict: del_de = del_dir_dict[de.obj_id] if os.path.dirname(de.path) == os.path.dirname(del_de.path): renamed_dirs.append(DiffEntry(del_de.path, de.obj_id, -1, de.path)) else: moved_dirs.append(DiffEntry(del_de.path, de.obj_id, -1, de.path)) del del_dir_dict[de.obj_id] else: ret_added_dirs.append(de) ret_deleted_files = del_file_dict.values() ret_deleted_dirs = del_dir_dict.values() for de in deleted_files: if de.obj_id == ZERO_OBJ_ID: ret_deleted_files.append(de) for de in deleted_dirs: if de.obj_id == ZERO_OBJ_ID: ret_deleted_dirs.append(de) else: ret_added_files = added_files ret_deleted_files = deleted_files ret_added_dirs = added_dirs ret_deleted_dirs = deleted_dirs return (ret_added_files, ret_deleted_files, ret_added_dirs, ret_deleted_dirs, modified_files, renamed_files, moved_files, renamed_dirs, moved_dirs)
def get_repo_root_seafdir(repo): root_id = commit_mgr.get_commit_root_id(repo.id, repo.version, repo.head_cmmt_id) return fs_mgr.load_seafdir(repo.store_id, repo.version, root_id)
def generate_catalog_entry(repo): """ Generate catalog entry in for the repo DB """ reconnect_db() proj = {} try: proj["id"] = repo.id proj["name"] = repo.name email = get_repo_owner(repo.id) proj["owner"] = email user_name = get_user_name(email) if user_name != email: proj["owner_name"] = user_name proj["in_progress"] = True proj["modified"] = repo.last_modify commits = get_commits(repo.id, 0, 1) commit = get_commit(repo.id, repo.version, commits[0].id) dir = fs_mgr.load_seafdir(repo.id, repo.version, commit.root_id) file = dir.lookup(ARCHIVE_METADATA_TARGET) if file: md = file.get_content().decode('utf-8') md = parse_markdown(md) if md: # Author a = md.get("Author") if a: a_list = a.split('\n') authors = [] for _ in a_list: author = {} aa = _.split(';') author['name'] = aa[0] if len(aa) > 1 and aa[1].strip(): author['affs'] = [ x.strip() for x in aa[1].split('|') ] author['affs'] = [x for x in author['affs'] if x] authors.append(author) if a: proj["authors"] = authors # Description d = md.get("Description") if d: proj["description"] = d # Comments c = md.get("Comments") if c: proj["comments"] = c # Title t = md.get("Title") if t: proj["title"] = t del proj["in_progress"] # Year y = md.get("Year") if y: proj["year"] = y # Institute i = md.get("Institute") if i: proj["institute"] = i proj["is_certified"] = is_certified_by_repo_id(repo.id) # add or update project metadata in DB c = Catalog.objects.add_or_update_by_repo_id(repo.id, email, proj, repo.name) # Catalog_id proj["catalog_id"] = str(c.catalog_id) except Exception: msg = "repo_name: %s, id: %s" % (repo.name, repo.id) logging.error(msg) logging.error(traceback.format_exc()) return proj