def process_one_file(filepath, dir_path, filename, cover_mode): ''' '对普通文件进行处理 '若该文件记录已在db中存在,按照cover_mode进行覆盖或跳过操作,若文件在db中不存在,则添加到db中 ''' global Mybucket dirId = getDirId(dir_path) # 获取文件所在目录的id if isFileExists(filename, dirId): # 若文件在db已存在 if cover_mode: with switch_collection(Mybucket, collection_name) as Mybucket: for u in Mybucket.objects(Q(na=filename) & Q(did=dirId)): # 删除原记录和对象 object_to_delete = getObjectId(filename, dirId) #对象名 delete_object(str(object_to_delete)) #删除rados对象 u.delete() size = os.path.getsize(filepath) # 获取文件大小,单位字节 Mybucket(na=filename, fod=True, did=dirId, si=size).save() # 添加新纪录 obj_name = getObjectId(filename, dirId) fo = open(filepath, 'rb') storeToRados(str(obj_name), fo, size) #写入rados fo.close() else: size = os.path.getsize(filepath) # 获取文件大小,单位字节 with switch_collection(Mybucket, collection_name) as Mybucket: Mybucket(na=filename, fod=True, did=dirId, si=size).save() # 添加新纪录 obj_name = getObjectId(filename, dirId) fo = open(filepath, 'rb') storeToRados(str(obj_name), fo, size) #写入rados fo.close()
def process_one_path(path, cover_mode, system_mode): ''' '对传入的目录进行处理,将目录及目录下的所有文件和子目录的元数据都加入到db中 ''' global recursive_flag global Mybucket recursive_flag = recursive_flag + 1 # 递归层级加1 if recursive_flag == 1: if isDirExists(path) == False: # 若目录在数据库中不存在 with switch_collection(Mybucket, collection_name) as Mybucket: Mybucket(na=path, fod=False).save() # 创建对象并添加到db files = os.listdir(path) # 罗列出目录下所有的子目录和文件 for file in files: if file in break_names: # 若是跳过文件 continue if path == '/': #若是linux系统的根目录'/' current_path = path + file else: current_path = path + '/' + file if isSysOrHide(system_mode, file, current_path): # 若是系统文件或者隐藏文件 continue if os.path.isdir(current_path): # 若是目录 if not isDirExists(current_path): # 若该目录在db中不存在 if isDirExists(path): #判断其父目录是否存在,若存在 parentId = getDirId(path) # 获取父目录的id with switch_collection(Mybucket, collection_name) as Mybucket: Mybucket(na=current_path, fod=False, did=parentId).save() # 创建对象并添加到db else: print("Error: no parent path") sys.exit() process_one_path(current_path, cover_mode, system_mode) # 继续递归地处理子目录下的文件与目录 elif os.path.isfile(current_path): # 若是普通文件 process_one_file(current_path, path, file, cover_mode) # 对文件进行处理 else: print("Warning: ", current_path, " is not a file or path") recursive_flag = recursive_flag - 1 # 该层递归结束,层级减1
def isDirExists(dir_path): ''' '判断dir_path的元数据是否在数据库中已经存在 '若存在,则返回True,不存在,则返回False ''' global Mybucket with switch_collection(Mybucket, collection_name) as Mybucket: if Mybucket.objects(na=dir_path).count() > 0: # 根据目录路径进行查询 return True else: return False
def isFileExists(filename, dirId): ''' '判断当前文件的元数据是否在数据库中已经存在 '若存在,则返回True,不存在,则返回False ''' global Mybucket with switch_collection(Mybucket, collection_name) as Mybucket: if Mybucket.objects(Q(na=filename) & Q(did=dirId) & Q(sds=False)).count() > 0: # 根据文件名以及文件所在路径进行查询 return True else: return False
def getObjectId(filename, dirId): ''' '获取文件的id并返回 ''' global Mybucket with switch_collection(Mybucket, collection_name) as Mybucket: file = Mybucket.objects(Q(na=filename) & Q(did=dirId) & Q(sds=False)) if file.count() > 0: f = file[0] return f.id else: print("Error: the file", filename, "do not exist") sys.exit()
def getDirId(parent_path): ''' '获取文件所在目录的id或者获取目录的父目录的id '返回父目录的id ''' global Mybucket with switch_collection(Mybucket, collection_name) as Mybucket: pr = Mybucket.objects(Q(na=parent_path) & Q(sds=False)) if pr.count() > 0: p = pr[0] return p.id else: print("Error: the path", parent_path, "do not exist") sys.exit()
def process_one_file(filepath, dir_path, filename, cover_mode): ''' '对普通文件进行处理 '若该文件记录已在db中存在,按照cover_mode进行覆盖或跳过操作,若文件在db中不存在,则添加到db中 ''' global Mybucket dirId = getDirId(dir_path) # 获取文件所在目录的id if isFileExists(filename, dirId): # 若文件在db已存在 if cover_mode: with switch_collection(Mybucket, collection_name) as Mybucket: for u in Mybucket.objects(Q(na=filename) & Q(did=dirId)): # 删除原记录 u.delete() size = os.path.getsize(filepath) # 获取文件大小,单位字节 f = Mybucket(na=filename, fod=True, did=dirId, si=size) # 添加新纪录 f.save() else: size = os.path.getsize(filepath) # 获取文件大小,单位字节 with switch_collection(Mybucket, collection_name) as Mybucket: f = Mybucket(na=filename, fod=True, did=dirId, si=size) # 添加新纪录 f.save()