def load_seafile_2(self): seafile = fs_mgr.load_seafile(self.repo_id_2, 1, self.first_fid) self.assertEqual(1, len(seafile.blocks)) self.assertTrue(len(seafile.blocks) > 0) self.assertEqual('2949afb5a9c351b9415b91c8f3d0d98991118c11', seafile.blocks[0]) second_seafile = fs_mgr.load_seafile(self.repo_id_2, 1, self.second_fid) self.assertEqual(1, len(second_seafile.blocks)) self.assertTrue(len(second_seafile.blocks) > 0) self.assertEqual('125f1e9dc9f3eca5a6819f9b4a2e17e53d7e2f78', second_seafile.blocks[0])
def load_seafile(self): seafile = fs_mgr.load_seafile(self.repo_id, 1, self.first_fid) self.assertEqual(1, len(seafile.blocks)) self.assertTrue(len(seafile.blocks) > 0) self.assertEqual('2949afb5a9c351b9415b91c8f3d0d98991118c11', seafile.blocks[0]) second_seafile = fs_mgr.load_seafile(self.repo_id, 1, self.second_fid) self.assertEqual(1, len(second_seafile.blocks)) self.assertTrue(len(second_seafile.blocks) > 0) self.assertEqual('125f1e9dc9f3eca5a6819f9b4a2e17e53d7e2f78', second_seafile.blocks[0])
def getMemberList(self): member_list = [] d = self.obj if d.version == 0: file_mtimes = [] try: file_mtimes = seafile_api.get_files_last_modified(self.repo.id, self.rel_path, -1) except: raise DAVError(HTTP_INTERNAL_ERROR) mtimes = UTF8Dict() for entry in file_mtimes: mtimes[entry.file_name] = entry.last_modified for name, dent in d.dirents.iteritems(): member_path = utf8_path_join(self.path, name) member_rel_path = utf8_path_join(self.rel_path, name) if dent.is_dir(): obj = fs_mgr.load_seafdir(d.store_id, d.version, dent.id) res = SeafDirResource(member_path, self.repo, member_rel_path, obj, self.environ) elif dent.is_file(): obj = fs_mgr.load_seafile(d.store_id, d.version, dent.id) res = SeafileResource(member_path, self.repo, member_rel_path, obj, self.environ) else: continue if d.version == 1: obj.last_modified = dent.mtime else: obj.last_modified = mtimes[name] member_list.append(res) return member_list
def copy_dirent(obj, repo, owner, path): """ Copies the files from Object Storage to local filesystem dir - SeafDir object fn - file name to be copied path - path in local file system where fn should be saved """ if obj.is_dir(): dpath = path + os.sep + obj.name d = fs_mgr.load_seafdir(repo.id, repo.version, obj.id) for dname, dobj in list(d.dirents.items()): copy_dirent(dobj, repo, owner, dpath) elif obj.is_file(): plist = [p for p in path.split(os.sep) if p] absdirpath = os.path.join(task._extracted_tmp_dir, *plist) if not os.path.exists(absdirpath): os.makedirs(absdirpath) seaf = fs_mgr.load_seafile(repo.id, repo.version, obj.id) #fname = obj.name.decode('utf-8') fname = obj.name to_path = os.path.join(absdirpath, fname) write_seaf_to_path(seaf, to_path) logger.debug('File: {} copied to {}'.format(fname, to_path)) else: logger.debug('Wrong seafile object: {}'.format(obj))
def get_member_list(self): member_list = [] d = self.obj if d.version == 0: file_mtimes = [] try: file_mtimes = seafile_api.get_files_last_modified(self.repo.id, self.rel_path, -1) except: raise DAVError(HTTP_INTERNAL_ERROR) mtimes = {} for entry in file_mtimes: mtimes[entry.file_name] = entry.last_modified for name, dent in d.dirents.items(): member_path = posixpath.join(self.path, name) member_rel_path = posixpath.join(self.rel_path, name) if dent.is_dir(): obj = fs_mgr.load_seafdir(d.store_id, d.version, dent.id) res = SeafDirResource(member_path, self.repo, member_rel_path, obj, self.environ) elif dent.is_file(): obj = fs_mgr.load_seafile(d.store_id, d.version, dent.id) res = SeafileResource(member_path, self.repo, member_rel_path, obj, self.environ) else: continue if d.version == 1: obj.last_modified = dent.mtime else: obj.last_modified = mtimes[name] member_list.append(res) return member_list
def get_blocks(repo_id, version, root): queued_dirs = [root] blocks = set() while queued_dirs: cdir = fs_mgr.load_seafdir(repo_id, version, queued_dirs.pop()) for dent in cdir.get_files_list(): seafFile = fs_mgr.load_seafile(repo_id, version, dent.id) blocks.update(seafFile.blocks) for dent in cdir.get_subdirs_list(): queued_dirs.append(dent.id) return blocks
def scan_file_virus(self, repo_id, file_id, file_path): try: tfd, tpath = tempfile.mkstemp() seafile = fs_mgr.load_seafile(repo_id, 1, file_id) for blk_id in seafile.blocks: os.write(tfd, block_mgr.load_block(repo_id, 1, blk_id)) with open(os.devnull, 'w') as devnull: ret_code = subprocess.call([self.settings.scan_cmd, tpath], stdout=devnull, stderr=devnull) return self.parse_scan_result(ret_code) except Exception as e: logger.warning('Virus scan for file %s encounter error: %s.', file_path, e) return -1 finally: if tfd > 0: os.close(tfd) os.unlink(tpath)
def scan_file_virus(self, repo_id, file_id, file_path): try: tfd, tpath = tempfile.mkstemp() seafile = fs_mgr.load_seafile(repo_id, 1, file_id) for blk_id in seafile.blocks: os.write(tfd, block_mgr.load_block(repo_id, 1, blk_id)) log_dir = os.path.join(os.environ.get('SEAFEVENTS_LOG_DIR', '')) logfile = os.path.join(log_dir, 'virus_scan.log') with open(logfile, 'a') as fp: ret_code = subprocess.call([self.settings.scan_cmd, tpath], stdout=fp, stderr=fp) return self.parse_scan_result(ret_code) except Exception as e: logger.warning('Virus scan for file %s encounter error: %s.', file_path, e) return -1 finally: if tfd > 0: os.close(tfd) os.unlink(tpath)
def diff_and_scan_content(self, task, client): repo_id = task.repo_id last_commit_id = task.last_commit_id new_commit_id = task.new_commit_id edb_session = appconfig.session_cls() # repo not changed, update timestamp if last_commit_id == new_commit_id: q = edb_session.query(ContentScanRecord) q = q.filter(ContentScanRecord.repo_id==repo_id, ContentScanRecord.commit_id==last_commit_id) q.update({"timestamp": self.dt}) edb_session.commit() edb_session.close() return # diff version = 1 new_commit = commit_mgr.load_commit(repo_id, version, new_commit_id) if new_commit is None: version = 0 new_commit = commit_mgr.load_commit(repo_id, version, new_commit_id) if not new_commit: logging.warning('Failed to load commit %s/%s', repo_id, new_commit_id) edb_session.close() return last_commit = None if last_commit_id: last_commit = commit_mgr.load_commit(repo_id, version, last_commit_id) if not last_commit: logging.warning('Failed to load commit %s/%s', repo_id, last_commit_id) edb_session.close() return new_root_id = new_commit.root_id last_root_id = last_commit.root_id if last_commit else ZERO_OBJ_ID differ = CommitDiffer(repo_id, version, last_root_id, new_root_id, True, False) added_files, deleted_files, added_dirs, deleted_dirs, modified_files,\ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff_to_unicode() # Handle renamed, moved and deleted files. q = edb_session.query(ContentScanResult).filter(ContentScanResult.repo_id==repo_id) results = q.all() if results: path_pairs_to_rename = [] paths_to_delete = [] # renamed dirs for r_dir in renamed_dirs: r_path = r_dir.path + '/' l = len(r_path) for row in results: if r_path == row.path[:l]: new_path = r_dir.new_path + '/' + row.path[l:] path_pairs_to_rename.append((row.path, new_path)) # moved dirs for m_dir in moved_dirs: m_path = m_dir.path + '/' l = len(m_path) for row in results: if m_path == row.path[:l]: new_path = m_dir.new_path + '/' + row.path[l:] path_pairs_to_rename.append((row.path, new_path)) # renamed files for r_file in renamed_files: r_path = r_file.path for row in results: if r_path == row.path: new_path = r_file.new_path path_pairs_to_rename.append((row.path, new_path)) # moved files for m_file in moved_files: m_path = m_file.path for row in results: if m_path == row.path: new_path = m_file.new_path path_pairs_to_rename.append((row.path, new_path)) for old_path, new_path in path_pairs_to_rename: q = edb_session.query(ContentScanResult) q = q.filter(ContentScanResult.repo_id==repo_id, ContentScanResult.path==old_path) q = q.update({"path": new_path}) # deleted files for d_file in deleted_files: d_path = d_file.path for row in results: if d_path == row.path: paths_to_delete.append(row.path) # We will scan modified_files and re-record later, # so delete previous records now for m_file in modified_files: m_path = m_file.path for row in results: if m_path == row.path: paths_to_delete.append(row.path) for path in paths_to_delete: q = edb_session.query(ContentScanResult) q = q.filter(ContentScanResult.repo_id==repo_id, ContentScanResult.path==path) q.delete() edb_session.commit() # scan added_files and modified_files by third-party API. files_to_scan = [] files_to_scan.extend(added_files) files_to_scan.extend(modified_files) a_count = 0 scan_results = [] for f in files_to_scan: if not self.should_scan_file (f.path, f.size): continue seafile_obj = fs_mgr.load_seafile(repo_id, 1, f.obj_id) content = seafile_obj.get_content() if not content: continue result = client.scan(content) if result and isinstance(result, dict): item = {"path": f.path, "detail": result} scan_results.append(item) else: logging.warning('Failed to scan %s:%s', repo_id, f.path) for item in scan_results: detail = json.dumps(item["detail"]) new_record = ContentScanResult(repo_id, item["path"], appconfig.platform, detail) edb_session.add(new_record) a_count += 1 if a_count >= 1: logging.info('Found %d new illegal files.', a_count) # Update ContentScanRecord if last_commit_id: q = edb_session.query(ContentScanRecord).filter(ContentScanRecord.repo_id==repo_id) q.update({"commit_id": new_commit_id, "timestamp": self.dt}) else: new_record = ContentScanRecord(repo_id, new_commit_id, self.dt) edb_session.add(new_record) edb_session.commit() edb_session.close()