def __try_save_next_bunch_of_file_states(self): """Check if we have multiple states to store to the DB, and do it.""" assert not in_main_thread() with self.__file_states_ready_to_write_lock: all_states = self.__file_states_ready_to_write.values() self.__file_states_ready_to_write = {} # "states" contains tuples like (base_dir_id, state). # Group them by base_dir_id, and write multiple file states at once. if all_states: logger.debug('Writing %i file state(s) at once', len(all_states)) grouped_by_base_dir = sorted_groupby(all_states, key=itemgetter(0)) for base_dir_id, per_base_dir in grouped_by_base_dir: states_to_write = imap(itemgetter(1), per_base_dir) logger.debug('Writing states for base dir %r', base_dir_id) with db.RDB() as rdbw: HostQueries.HostFiles.add_file_states( base_dir_id, states_to_write, rdbw) logger.debug('Wrote the states')
def __backup_some_phys_files(self, base_dir, files, ugroup, __do_start_backup=True): r"""Given some files, create a new dataset and start to backup them. >>> # ugroup = UserGroup( >>> # uuid=UserGroupUUID('00000000-bbbb-0000-0000-000000000001'), >>> # name='AlphA', >>> # private=True, >>> # enc_key='\x01\xe6\x13\xdab)\xd2n\xd6\xafTH\x03h\x02\x12' >>> # '\x17D\x1a\xeb\x8b6\xc0\x9b\xa6\x7f\xcc\x06N\xcf' >>> # '\x8b\xcd' >>> # ) >>> # __backup_some_phys_files( >>> # base_dir='u'/home/john/FreeBrie', >>> # files=[ >>> # LocalPhysicalFileStateRel( >>> # rel_dir='', >>> # rel_file=u'f1.mp3', >>> # size=13829879, >>> # time_changed=datetime(2012, 11, 5, 12,12,41,904430)), >>> # LocalPhysicalFileStateRel( >>> # rel_dir='', >>> # rel_file=u'f2.avi', >>> # size=3522710, >>> # time_changed=datetime(2012, 11, 5, 12,12,41,988433)), >>> # LocalPhysicalFileStateRel( >>> # rel_dir=u'a/b', >>> # rel_file=u'bbb', >>> # size=4, >>> # time_changed=datetime(2012, 10, 11, 15 33 42 19808)), >>> # LocalPhysicalFileStateRel( >>> # rel_dir=u'a/b/c', >>> # rel_file=u'ccc', >>> # size=4, >>> # time_changed=datetime(2012, 10, 11, 15 33 41 979807)) >>> # ], >>> # ugroup=ugroup) @todo: complete the unit test, which is half-done! @param base_dir: the directory being backed up. @type base_dir: basestring @param files: the iterable over the files which should be backed up. Contains C{LocalPhysicalFileStateRel} objects. The caller should ensure that C{files} is non-empty! @type files: col.Iterable @type ugroup: UserGroup @return: the created dataset (if succeeded). @rtype: DatasetOnPhysicalFiles, NoneType """ logger.debug('__backup_some_phys_files(%r, %r)', base_dir, ugroup) # Group files by rel_dir; then ignore base_dir, # keep only rel_dir, rel_file, size and time_changed files_grouped_by_rel_dir = \ ((RelVirtualFile(rel_dir=f.rel_dir, filename=f.rel_file, # If we can read real stat, read it; # otherwise we'll emulate it with fake_stat stat=coalesce(os_ex.safe_stat( # real stat os.path.join(base_dir, f.rel_path)), os_ex.fake_stat( # deleted file st_mode=None, atime=f.time_changed, mtime=f.time_changed, ctime=f.time_changed, size=None)), stat_getter=lambda f=f: os_ex.safe_stat( os.path.join(base_dir, f.rel_path)), file_getter=lambda f=f: open(os.path.join(base_dir, f.rel_path), 'rb')) for f in per_rel_dir) for rel_dir, per_rel_dir in sorted_groupby(files, attrgetter('rel_dir'))) # Example: # files_grouped_by_rel_dir = [ # [ # RelVirtualFile(...), # RelVirtualFile(...), # RelVirtualFile(...) # ], # [ # RelVirtualFile(...), # [ # RelVirtualFile(...) # ] # ] _path_map = {base_dir: {'ifiles': files_grouped_by_rel_dir, 'stat': os_ex.safe_stat(base_dir)}} ds_uuid = DatasetUUID.safe_cast_uuid(gen_uuid()) ds = self.select_paths_for_backup(ds_name='', ds_uuid=ds_uuid, ugroup_uuid=ugroup.uuid, sync=True, paths_map=_path_map) if ds is not None and __do_start_backup: self.start_backup(ds_uuid) return ds
def __backup_some_phys_files(self, base_dir, files, ugroup, __do_start_backup=True): r"""Given some files, create a new dataset and start to backup them. >>> # ugroup = UserGroup( >>> # uuid=UserGroupUUID('00000000-bbbb-0000-0000-000000000001'), >>> # name='AlphA', >>> # private=True, >>> # enc_key='\x01\xe6\x13\xdab)\xd2n\xd6\xafTH\x03h\x02\x12' >>> # '\x17D\x1a\xeb\x8b6\xc0\x9b\xa6\x7f\xcc\x06N\xcf' >>> # '\x8b\xcd' >>> # ) >>> # __backup_some_phys_files( >>> # base_dir='u'/home/john/FreeBrie', >>> # files=[ >>> # LocalPhysicalFileStateRel( >>> # rel_dir='', >>> # rel_file=u'f1.mp3', >>> # size=13829879, >>> # time_changed=datetime(2012, 11, 5, 12,12,41,904430)), >>> # LocalPhysicalFileStateRel( >>> # rel_dir='', >>> # rel_file=u'f2.avi', >>> # size=3522710, >>> # time_changed=datetime(2012, 11, 5, 12,12,41,988433)), >>> # LocalPhysicalFileStateRel( >>> # rel_dir=u'a/b', >>> # rel_file=u'bbb', >>> # size=4, >>> # time_changed=datetime(2012, 10, 11, 15 33 42 19808)), >>> # LocalPhysicalFileStateRel( >>> # rel_dir=u'a/b/c', >>> # rel_file=u'ccc', >>> # size=4, >>> # time_changed=datetime(2012, 10, 11, 15 33 41 979807)) >>> # ], >>> # ugroup=ugroup) @todo: complete the unit test, which is half-done! @param base_dir: the directory being backed up. @type base_dir: basestring @param files: the iterable over the files which should be backed up. Contains C{LocalPhysicalFileStateRel} objects. The caller should ensure that C{files} is non-empty! @type files: col.Iterable @type ugroup: UserGroup @return: the created dataset (if succeeded). @rtype: DatasetOnPhysicalFiles, NoneType """ logger.debug('__backup_some_phys_files(%r, %r)', base_dir, ugroup) # Group files by rel_dir; then ignore base_dir, # keep only rel_dir, rel_file, size and time_changed files_grouped_by_rel_dir = \ ((RelVirtualFile(rel_dir=f.rel_dir, filename=f.rel_file, # If we can read real stat, read it; # otherwise we'll emulate it with fake_stat stat=coalesce(os_ex.safe_stat( # real stat os.path.join(base_dir, f.rel_path)), os_ex.fake_stat( # deleted file st_mode=None, atime=f.time_changed, mtime=f.time_changed, ctime=f.time_changed, size=None)), stat_getter=lambda f=f: os_ex.safe_stat( os.path.join(base_dir, f.rel_path)), file_getter=lambda f=f: open(os.path.join(base_dir, f.rel_path), 'rb')) for f in per_rel_dir) for rel_dir, per_rel_dir in sorted_groupby(files, attrgetter('rel_dir'))) # Example: # files_grouped_by_rel_dir = [ # [ # RelVirtualFile(...), # RelVirtualFile(...), # RelVirtualFile(...) # ], # [ # RelVirtualFile(...), # [ # RelVirtualFile(...) # ] # ] _path_map = { base_dir: { 'ifiles': files_grouped_by_rel_dir, 'stat': os_ex.safe_stat(base_dir) } } ds_uuid = DatasetUUID.safe_cast_uuid(gen_uuid()) ds = self.select_paths_for_backup(ds_name='', ds_uuid=ds_uuid, ugroup_uuid=ugroup.uuid, sync=True, paths_map=_path_map) if ds is not None and __do_start_backup: self.start_backup(ds_uuid) return ds
# Turn the original FileToUpload's to RelVirtualFile's _vfiles = ( RelVirtualFile( rel_dir=ospath.dirname(ftu.rel_path), filename=ospath.basename(ftu.rel_path), stat=fstat, stat_getter=lambda fstat=fstat: fstat, file_getter=ftu.file_getter, ) for ftu, fstat in _files_to_upload_with_stat ) # consists_of(_vfiles, RelVirtualFile) # Group RelVirtualFile's by rel_dir _files_grouped_by_rel_dir = ( (rvf for rvf in per_rel_dir) for rel_dir, per_rel_dir in sorted_groupby(_vfiles, attrgetter("rel_dir")) ) paths_map = { base_dir: { "ifiles": _files_grouped_by_rel_dir, "stat": fake_stat(isdir=True, atime=upload_time, mtime=upload_time, ctime=upload_time), } } ds_uuid = DatasetUUID.safe_cast_uuid(gen_uuid()) dataset = DatasetOnVirtualFiles.from_paths( ds_name, ds_uuid, group_uuid, sync, paths_map, upload_time, cryptographer ) assert dataset is not None
for ftu in files) # isinstance(ftu, FileToUpload) # Turn the original FileToUpload's to RelVirtualFile's _vfiles = (RelVirtualFile(rel_dir=ospath.dirname(ftu.rel_path), filename=ospath.basename(ftu.rel_path), stat=fstat, stat_getter=lambda fstat=fstat: fstat, file_getter=ftu.file_getter) for ftu, fstat in _files_to_upload_with_stat) # consists_of(_vfiles, RelVirtualFile) # Group RelVirtualFile's by rel_dir _files_grouped_by_rel_dir = \ ((rvf for rvf in per_rel_dir) for rel_dir, per_rel_dir in sorted_groupby(_vfiles, attrgetter('rel_dir'))) paths_map = { base_dir: { 'ifiles': _files_grouped_by_rel_dir, 'stat': fake_stat(isdir=True, atime=upload_time, mtime=upload_time, ctime=upload_time) } } ds_uuid = DatasetUUID.safe_cast_uuid(gen_uuid()) dataset = DatasetOnVirtualFiles.from_paths(ds_name, ds_uuid, group_uuid,