def __init__(self, name, digest, hosts=None, last_msg_read_ts=None, trusted=False, *args, **kwargs): """ @param name: the name of the user. @type name: basestring @param digest: the user digest. @type digest: str @precondition: len(digest) == 40 @param hosts: the iterable over the C{Host} objects. @type hosts: col.Iterable, NoneType @type last_msg_read_ts: datetime, NoneType @param trusted: whether the hosts of the user are trusted. @type trusted: bool """ super(User, self).__init__(*args, **kwargs) self.name = name self.digest = digest # Optional constructor arguments self.hosts = list(coalesce(hosts, [])) self.last_msg_read_ts = coalesce(last_msg_read_ts, datetime.min) # Optional document fields self.trusted = trusted
def __init__(self, handle_transaction_before_create=None, handle_transaction_after_create=None, handle_transaction_destroy=None): self.__handle_transaction_before_create = \ coalesce(handle_transaction_before_create, lambda tr, state: None) self.__handle_transaction_after_create = \ coalesce(handle_transaction_after_create, lambda tr, state: None) self.__handle_transaction_destroy = \ coalesce(handle_transaction_destroy, lambda tr, state: None)
def __get_inbox_update_for_host(self): """ Calculate which messages are missing from the host thus should be sent out to it in HEARTBEAT_ACK. @returns: dict (WHAT?) """ host = self.message.src tr_data = self.message.inbox_update with db.RDB() as rdbw: locale = TrustedQueries.TrustedSettings.get_setting_or_default( rdbw, Queries.Settings.LANGUAGE, host.uuid).lower() locales = get_wider_locales(locale) assert all(l.lower() == l for l in locales), repr(locales) with ds.FDB() as fdbw: if not tr_data.get('last_msg_sync_ts'): target_msg_read_ts = FDBQueries.Users.get_last_msg_read_ts( host.user.name, fdbw) or datetime.min target_msg_sync_ts = datetime.min else: _ds_msg_read_ts, _ds_msg_sync_ts = \ FDBQueries.Users.get_last_msg_read_and_sync_ts( host.user.name, host.uuid, fdbw) ds_msg_read_ts = coalesce(_ds_msg_read_ts, datetime.min) ds_msg_sync_ts = coalesce(_ds_msg_sync_ts, datetime.min) target_msg_read_ts = max(tr_data.get('last_msg_read_ts') \ or datetime.min, ds_msg_read_ts) target_msg_sync_ts = max(tr_data.get('last_msg_sync_ts') \ or datetime.min, ds_msg_sync_ts) target_ts = max(target_msg_read_ts, target_msg_sync_ts) messages = FDBQueries.UserMessages.get_new_messages(host.user.name, target_ts, locales=locales, fdbw=fdbw) _translated_messages = (m.translate(locale=locale, strict=False) for m in messages) translated_messages = (msg for msg in _translated_messages if msg.body) if target_msg_read_ts == datetime.min: target_msg_read_ts = None # Eagerly evaluate translated_messages to store in the message. return {'last_msg_read_ts': target_msg_read_ts, 'messages': list(translated_messages)}
def __init__(self, chunk_uuids_to_replicate=None, chunk_uuids_to_restore=None, *args, **kwargs): """Constructor. @type chunk_uuids_to_replicate: NoneType, col.Iterable @type chunk_uuids_to_restore: NoneType, col.Iterable """ super(NotifyHostTransactionState_Node, self).__init__(*args, **kwargs) self.chunk_uuids_to_replicate = list(coalesce(chunk_uuids_to_replicate, [])) self.chunk_uuids_to_restore = list(coalesce(chunk_uuids_to_restore, [])) assert self.is_valid_bsonable(), repr(self)
def __init__(self, chunk_uuids_to_replicate=None, chunk_uuids_to_restore=None, *args, **kwargs): """Constructor. @type chunk_uuids_to_replicate: NoneType, col.Iterable @type chunk_uuids_to_restore: NoneType, col.Iterable """ super(NotifyHostTransactionState_Node, self).__init__(*args, **kwargs) self.chunk_uuids_to_replicate = list( coalesce(chunk_uuids_to_replicate, [])) self.chunk_uuids_to_restore = list(coalesce(chunk_uuids_to_restore, [])) assert self.is_valid_bsonable(), repr(self)
def wait_for_message_for_peer(self, inh, prefer_msg_uuid, still_wait_checker, d=None): """Implementation of interface from C{AbstractTransactionManager}. @type inh: AbstractInhabitant @type prefer_msg_uuid: MessageUUID, NoneType @type still_wait_checker: col.Callable @type d: defer.Deferred, NoneType @rtype: defer.Deferred @todo: C{still_wait_checker} is not used; instead, the user should probably delete the C{Deferred} from C{__outgoing_message_notifs_by_host_uuid}. """ assert not in_main_thread() d = defer.Deferred() if d is None else d with self.__outgoing_messages_lock: # Do we have a message with the preferred UUID? candidate_msg = \ self.__outgoing_messages_by_uuid.get(prefer_msg_uuid, None) # Do we have a message for a particular peer? reply_msg = self.deliver_message_for_peer(inh, prefer_msg_uuid) # What if we would try to deliver a message with a particular # UUID? Too lazy to implement it now (cause it's not needed at the # moment), but... could it help us? # if reply_msg is not None: # For now, if we have a reply message for some particular peer, # this takes precedence over the reply message # we could send to. # This might be wrong though. # Note that coalesce() may return a msg, # and reply_msg may be None, so we'd better compare them # only if reply_msg is definitely not None. if coalesce(candidate_msg, reply_msg) != reply_msg: logger.warning("Could've deliver %r, but using %r instead", candidate_msg, reply_msg) # We have an outgoing message for inh already! d.callback(reply_msg) else: # Unfortunately, we don't have a message yet. # We have to put a deferred callback to the queue # for this peer. # Whenever a message directed to this host is added, # the message adder will call this callback. self.__outgoing_message_notifs_by_host_uuid[inh.uuid] \ .append(d) return d
def __init__(self, deleted_datasets=None, *args, **kwargs): """Constructor. @type deleted_datasets: NoneType, col.Iterable """ super(ExecuteOnNodeTransactionState_Node, self) \ .__init__(*args, **kwargs) self.deleted_datasets = list(coalesce(deleted_datasets, [])) assert self.is_valid_bsonable(), repr(self)
def __init__(self, eventname_callback_map=None, *args, **kwargs): """Constructor. @param eventname_callback_map: event name -> callback, callback receives event as argument. @type eventname_callback_map: col.Mapping, NoneType """ super(EventHandlerWithCallbacks, self).__init__(*args, **kwargs) self.eventname_callback_map = \ dict(coalesce(eventname_callback_map, {}))
def __init__(self, listened_event_names=None, silent=True, *args, **kwargs): """Constructor. @param listened_event_names: names of events that would not be dropped. @type listened_event_names: col.Iterable, NoneType @param silent: should handler be silent about some errors? @type silent: bool """ super(AbstractEventHandler, self).__init__(*args, **kwargs) self.silent = silent self.listened_event_names = set(coalesce(listened_event_names, []))
def on_begin(self): """ Either send CHUNKS request to another Host (if C{self.is_outgoing()}), or receive CHUNKS request from another Host. """ with self.open_state() as state: chunks = state.chunks logger.debug('Initializing CHUNKS transaction with %i chunks', len(coalesce(chunks, []))) my_host = self.manager.app.host assert ((self.message.src == my_host and consists_of(chunks, AbstractChunkWithContents)) or (self.message.dst == my_host and chunks is None)), repr(self) if self.is_outgoing(): logger.verbose('Going to send %i chunk(s): %r', len(coalesce(chunks, [])), [ch.uuid for ch in chunks]) self.message.chunks = chunks self.manager.post_message(self.message) elif self.is_incoming(): logger.debug('Going to receive the chunks') self.message.body_receiver.on_finish.addCallback( self._incoming_chunks_body_received) self.message.body_receiver.on_finish.addBoth( self._incoming_chunks_body_finish) else: self.neither_incoming_nor_outgoing()
def get_log_directory(__debug_override=None): r""" >>> # Mostly smoke tests >>> get_log_directory(__debug_override=True) u'.' >>> get_log_directory(__debug_override=False) u'/var/log/freebrie' @return: the directory where the logs should be stored. @rtype: basestring """ debug = coalesce(__debug_override, __debug__) if debug: return u'.' else: return os.path.join(u'/var/log', version.project_name)
def __init__(self, states, transitions, initial_state, event_announce_callback=None, state_event_adapters=None, *args, **kwargs): """Constructor. @param states: supported states. @type: col.Iterable @param transitions: transitions possible in FSM. @type: col.Iterable @param initial_state: initial state of FSM. @type: basestring @param event_announce_callback: callable, that would receive events from FSM (this can be event about state change or something else). @type: col.Callable @param state_event_adapters: state -> function functon transforms state to event (used, when FSM changes state, as argument to event_announce_callback). @type: col.Mapping, NoneType """ super(FSM, self).__init__(*args, **kwargs) self.states = frozenset(states) assert initial_state in self.states, (initial_state, self.states) self.__current_state = initial_state self.__transitions_storage = {} self.__transitions_lock = RLock() for transition in transitions: self.__add_transition(transition) self.event_announce_callback = event_announce_callback self.state_event_adapters = dict(coalesce(state_event_adapters, {}))
def __get_chunk_file_path(self, chunk_uuid, is_dummy, dir_path=None): """ For a given uuid (of running host) and the chunk_uuid, return the appropriate path to store the chunk files. On release: '~/.freebrie/chunks/.../00000000-1111-2222-3333-123456789012.chunk' @type chunk_uuid: UUID @param is_dummy: whether the chunk is dummy. @type is_dummy: bool @param dir_path: Use this path to override the path for the directory to the chunks; if omitted, the real one is used. @type dir_path: basestring """ _ch_hex = chunk_uuid.hex subdir1, subdir2 = _ch_hex[7], _ch_hex[6] return os.path.join(coalesce(dir_path, self.__chunk_dir), subdir1, subdir2, '{}.chunk'.format(chunk_uuid))
def _calculate_distribution(cls, chunks_map, hosts_free_space_map, rng=None): r""" Calculate what number of (what kind) of chunks should be uploaded to what hosts. >>> from uuid import UUID >>> from common.logger import add_verbose_level; add_verbose_level() >>> rng = random.Random(42) >>> # Very simple >>> ProvideBackupHostsTransaction_Node._calculate_distribution( ... chunks_map={0: 17, 1: 13}, ... hosts_free_space_map={ ... UUID('00000000-1111-0000-0000-000000000011'): 1073741824, ... }, ... rng=rng ... ) # doctest:+NORMALIZE_WHITESPACE {UUID('00000000-1111-0000-0000-000000000011'): {0: 17, 1: 13}} >>> ProvideBackupHostsTransaction_Node._calculate_distribution( ... chunks_map={0: 17, 1: 13}, ... hosts_free_space_map={ ... UUID('00000000-1111-0000-0000-000000000011'): 1073741824, ... UUID('00000000-1111-0000-0000-000000000012'): 1073741824, ... }, ... rng=rng ... ) # doctest:+NORMALIZE_WHITESPACE {UUID('00000000-1111-0000-0000-000000000011'): {0: 12, 1: 3}, UUID('00000000-1111-0000-0000-000000000012'): {0: 5, 1: 10}} >>> # Make sure it doesn't corrupt the original mappings: >>> _chunks = {0: 5, 1: 7} >>> _free_space = { ... UUID('00000000-1111-0000-0000-000000000011'): 12345670, ... UUID('00000000-1111-0000-0000-000000000012'): 23456780, ... UUID('00000000-1111-0000-0000-000000000013'): 34567890, ... } >>> ProvideBackupHostsTransaction_Node._calculate_distribution( ... chunks_map=_chunks, ... hosts_free_space_map=_free_space, ... rng=rng ... ) # doctest:+NORMALIZE_WHITESPACE {UUID('00000000-1111-0000-0000-000000000011'): {0: 4, 1: 1}, UUID('00000000-1111-0000-0000-000000000012'): {1: 2}, UUID('00000000-1111-0000-0000-000000000013'): {0: 1, 1: 4}} >>> _chunks {0: 5, 1: 7} >>> _free_space # doctest:+NORMALIZE_WHITESPACE {UUID('00000000-1111-0000-0000-000000000011'): 12345670, UUID('00000000-1111-0000-0000-000000000012'): 23456780, UUID('00000000-1111-0000-0000-000000000013'): 34567890} >>> # Finally, make sure that it handles the cloud limits well. >>> # 1. This still fits... >>> ProvideBackupHostsTransaction_Node._calculate_distribution( ... chunks_map={2: 2, 3: 10}, ... hosts_free_space_map={ ... UUID('00000000-1111-0000-0000-000000000011'): 50331648, ... UUID('00000000-1111-0000-0000-000000000012'): 41943040, ... }, ... rng=rng ... ) # doctest:+NORMALIZE_WHITESPACE {UUID('00000000-1111-0000-0000-000000000011'): {3: 6}, UUID('00000000-1111-0000-0000-000000000012'): {2: 2, 3: 4}} >>> # 2. But this doesn't fit. >>> ProvideBackupHostsTransaction_Node._calculate_distribution( ... chunks_map={2: 2, 3: 10}, ... hosts_free_space_map={ ... UUID('00000000-1111-0000-0000-000000000011'): 50331648 - 1, ... UUID('00000000-1111-0000-0000-000000000012'): 41943040, ... }, ... rng=rng ... ) Traceback (most recent call last): ... NoSpaceError @param chunks_map: the map describing the chunks to be uploaded. It maps the chunks size code to the number of such chunks. Eg: {0: 17, 1: 13} @type chunks_map: col.Mapping @param hosts_free_space_map: the mapping from the host UUID to the space available on it (in bytes, approximately). Eg: {UUID('00000000-1111-0000-0000-000000000011'): 1073741824} @type hosts_free_space_map: col.Mapping @param rng: a rng to use for distribution. @type rng: random.Random @return: the mapping from the host UUID (which will accept chunks) to the mapping of chunks to upload to it. The mapping of uploaded chunks looks like this: the chunk size code is mapped to the number of such chunks, eg: C{ {0: 8, 1: 5} }. @rtype: col.Mapping @raises NoSpaceError: if no solution is found to upload the chunks. """ rng = coalesce(rng, random) # Make a copy, to never corrupt the original. _chunks_map = dict(chunks_map) _hosts_free_space_map = dict(hosts_free_space_map) logger.debug('Need to allocate %r at %r', _chunks_map, _hosts_free_space_map) M = 1024 * 1024 result = {} # Let's loop over the desired chunks (actually, its sizes) # by the size descending, and allocate each chunk to some host. _size_codes = \ chain.from_iterable([sz for i in xrange(_chunks_map[sz])] for sz in sorted(_chunks_map.iterkeys(), reverse=True)) for chunk_size_code in _size_codes: this_chunk_size = 2 ** chunk_size_code * M # Which hosts still have enough space? candidate_host_uuids = \ [u for u, sz in _hosts_free_space_map.iteritems() if this_chunk_size <= sz] logger.verbose('To allocate %r MiB, we have these hosts: %r', this_chunk_size, candidate_host_uuids) if not candidate_host_uuids: # Oops, we still have chunks unallocated, # but no more host can receive it. raise NoSpaceError() else: use_host_uuid = rng.choice(candidate_host_uuids) # Mark some space on this host as occupied. _hosts_free_space_map[use_host_uuid] -= this_chunk_size # Take chunk counts for this host... per_host_chunks_counts = result.setdefault(use_host_uuid, {}) # ... and increase the chunk count for the currently # processed chunk. per_host_chunks_counts[chunk_size_code] = \ per_host_chunks_counts.get(chunk_size_code, 0) + 1 return result
def query_data_replication_stats(self, ds_uuid, path, recursive=False, on_received=None): """ Request the replication statistics for the particular (or any) files in the particular (or any) dataset for this host. @param ds_uuid: the UUID of the dataset in the cloud (or C{None} if any dataset is ok). @type ds_uuid: UUID, NoneType @param path: the path which needs the statistics (or C{None} if any path in the dataset is ok). @type path: basestring, NoneType @param recursive: whether the path should be treated recursive. @type recursive: bool @param on_received: the handler which is called when the data is received. If C{None}, nothing is called, the data is just output via the logger. @type on_received: col.Callable, NoneType """ _query = {'select': ('file_count', 'file_size', 'uniq_file_count', 'uniq_file_size', 'full_replicas_count', 'chunk_count', 'chunk_replicas_count', 'hosts_count'), 'from': 'data_stats', 'where': {'dataset': ds_uuid.hex if ds_uuid is not None else '*', 'path': coalesce(path, '*'), 'rec': recursive}} nifn_tr = self.tr_manager.create_new_transaction( name='NEED_INFO_FROM_NODE', src=self.host, dst=self.primary_node, # NEED_INFO_FROM_NODE-specific query=_query) @exceptions_logged(logger) def error_handler(failure): logger_status.error( 'The statistics for the %r/%r could not ' 'be queried from the cloud: \r %s', ds_uuid, path, failure, failure.getErrorMessage(), extra={'_type': 'error_on_querying_statistics_from_dataset'}) @exceptions_logged(logger) @contract_epydoc def success_handler(ni_state): """ @type ni_state: transactions.NeedInfoFromNodeTransaction_Host.State """ if on_received is not None: on_received(ds_uuid, path, ni_state.ack_result) logger_status.info('Data statistics %r', ni_state.ack_result, extra={'_type': 'data_stats', 'ds_uuid': ds_uuid, 'path': path, 'value': ni_state.ack_result}) # Do not use addCallbacks() here nifn_tr.completed.addCallback(success_handler) nifn_tr.completed.addErrback(error_handler)
def __backup_some_phys_files(self, base_dir, files, ugroup, __do_start_backup=True): r"""Given some files, create a new dataset and start to backup them. >>> # ugroup = UserGroup( >>> # uuid=UserGroupUUID('00000000-bbbb-0000-0000-000000000001'), >>> # name='AlphA', >>> # private=True, >>> # enc_key='\x01\xe6\x13\xdab)\xd2n\xd6\xafTH\x03h\x02\x12' >>> # '\x17D\x1a\xeb\x8b6\xc0\x9b\xa6\x7f\xcc\x06N\xcf' >>> # '\x8b\xcd' >>> # ) >>> # __backup_some_phys_files( >>> # base_dir='u'/home/john/FreeBrie', >>> # files=[ >>> # LocalPhysicalFileStateRel( >>> # rel_dir='', >>> # rel_file=u'f1.mp3', >>> # size=13829879, >>> # time_changed=datetime(2012, 11, 5, 12,12,41,904430)), >>> # LocalPhysicalFileStateRel( >>> # rel_dir='', >>> # rel_file=u'f2.avi', >>> # size=3522710, >>> # time_changed=datetime(2012, 11, 5, 12,12,41,988433)), >>> # LocalPhysicalFileStateRel( >>> # rel_dir=u'a/b', >>> # rel_file=u'bbb', >>> # size=4, >>> # time_changed=datetime(2012, 10, 11, 15 33 42 19808)), >>> # LocalPhysicalFileStateRel( >>> # rel_dir=u'a/b/c', >>> # rel_file=u'ccc', >>> # size=4, >>> # time_changed=datetime(2012, 10, 11, 15 33 41 979807)) >>> # ], >>> # ugroup=ugroup) @todo: complete the unit test, which is half-done! @param base_dir: the directory being backed up. @type base_dir: basestring @param files: the iterable over the files which should be backed up. Contains C{LocalPhysicalFileStateRel} objects. The caller should ensure that C{files} is non-empty! @type files: col.Iterable @type ugroup: UserGroup @return: the created dataset (if succeeded). @rtype: DatasetOnPhysicalFiles, NoneType """ logger.debug('__backup_some_phys_files(%r, %r)', base_dir, ugroup) # Group files by rel_dir; then ignore base_dir, # keep only rel_dir, rel_file, size and time_changed files_grouped_by_rel_dir = \ ((RelVirtualFile(rel_dir=f.rel_dir, filename=f.rel_file, # If we can read real stat, read it; # otherwise we'll emulate it with fake_stat stat=coalesce(os_ex.safe_stat( # real stat os.path.join(base_dir, f.rel_path)), os_ex.fake_stat( # deleted file st_mode=None, atime=f.time_changed, mtime=f.time_changed, ctime=f.time_changed, size=None)), stat_getter=lambda f=f: os_ex.safe_stat( os.path.join(base_dir, f.rel_path)), file_getter=lambda f=f: open(os.path.join(base_dir, f.rel_path), 'rb')) for f in per_rel_dir) for rel_dir, per_rel_dir in sorted_groupby(files, attrgetter('rel_dir'))) # Example: # files_grouped_by_rel_dir = [ # [ # RelVirtualFile(...), # RelVirtualFile(...), # RelVirtualFile(...) # ], # [ # RelVirtualFile(...), # [ # RelVirtualFile(...) # ] # ] _path_map = { base_dir: { 'ifiles': files_grouped_by_rel_dir, 'stat': os_ex.safe_stat(base_dir) } } ds_uuid = DatasetUUID.safe_cast_uuid(gen_uuid()) ds = self.select_paths_for_backup(ds_name='', ds_uuid=ds_uuid, ugroup_uuid=ugroup.uuid, sync=True, paths_map=_path_map) if ds is not None and __do_start_backup: self.start_backup(ds_uuid) return ds
def _calculate_distribution(cls, chunks_map, hosts_free_space_map, rng=None): r""" Calculate what number of (what kind) of chunks should be uploaded to what hosts. >>> from uuid import UUID >>> from common.logger import add_verbose_level; add_verbose_level() >>> rng = random.Random(42) >>> # Very simple >>> ProvideBackupHostsTransaction_Node._calculate_distribution( ... chunks_map={0: 17, 1: 13}, ... hosts_free_space_map={ ... UUID('00000000-1111-0000-0000-000000000011'): 1073741824, ... }, ... rng=rng ... ) # doctest:+NORMALIZE_WHITESPACE {UUID('00000000-1111-0000-0000-000000000011'): {0: 17, 1: 13}} >>> ProvideBackupHostsTransaction_Node._calculate_distribution( ... chunks_map={0: 17, 1: 13}, ... hosts_free_space_map={ ... UUID('00000000-1111-0000-0000-000000000011'): 1073741824, ... UUID('00000000-1111-0000-0000-000000000012'): 1073741824, ... }, ... rng=rng ... ) # doctest:+NORMALIZE_WHITESPACE {UUID('00000000-1111-0000-0000-000000000011'): {0: 12, 1: 3}, UUID('00000000-1111-0000-0000-000000000012'): {0: 5, 1: 10}} >>> # Make sure it doesn't corrupt the original mappings: >>> _chunks = {0: 5, 1: 7} >>> _free_space = { ... UUID('00000000-1111-0000-0000-000000000011'): 12345670, ... UUID('00000000-1111-0000-0000-000000000012'): 23456780, ... UUID('00000000-1111-0000-0000-000000000013'): 34567890, ... } >>> ProvideBackupHostsTransaction_Node._calculate_distribution( ... chunks_map=_chunks, ... hosts_free_space_map=_free_space, ... rng=rng ... ) # doctest:+NORMALIZE_WHITESPACE {UUID('00000000-1111-0000-0000-000000000011'): {0: 4, 1: 1}, UUID('00000000-1111-0000-0000-000000000012'): {1: 2}, UUID('00000000-1111-0000-0000-000000000013'): {0: 1, 1: 4}} >>> _chunks {0: 5, 1: 7} >>> _free_space # doctest:+NORMALIZE_WHITESPACE {UUID('00000000-1111-0000-0000-000000000011'): 12345670, UUID('00000000-1111-0000-0000-000000000012'): 23456780, UUID('00000000-1111-0000-0000-000000000013'): 34567890} >>> # Finally, make sure that it handles the cloud limits well. >>> # 1. This still fits... >>> ProvideBackupHostsTransaction_Node._calculate_distribution( ... chunks_map={2: 2, 3: 10}, ... hosts_free_space_map={ ... UUID('00000000-1111-0000-0000-000000000011'): 50331648, ... UUID('00000000-1111-0000-0000-000000000012'): 41943040, ... }, ... rng=rng ... ) # doctest:+NORMALIZE_WHITESPACE {UUID('00000000-1111-0000-0000-000000000011'): {3: 6}, UUID('00000000-1111-0000-0000-000000000012'): {2: 2, 3: 4}} >>> # 2. But this doesn't fit. >>> ProvideBackupHostsTransaction_Node._calculate_distribution( ... chunks_map={2: 2, 3: 10}, ... hosts_free_space_map={ ... UUID('00000000-1111-0000-0000-000000000011'): 50331648 - 1, ... UUID('00000000-1111-0000-0000-000000000012'): 41943040, ... }, ... rng=rng ... ) Traceback (most recent call last): ... NoSpaceError @param chunks_map: the map describing the chunks to be uploaded. It maps the chunks size code to the number of such chunks. Eg: {0: 17, 1: 13} @type chunks_map: col.Mapping @param hosts_free_space_map: the mapping from the host UUID to the space available on it (in bytes, approximately). Eg: {UUID('00000000-1111-0000-0000-000000000011'): 1073741824} @type hosts_free_space_map: col.Mapping @param rng: a rng to use for distribution. @type rng: random.Random @return: the mapping from the host UUID (which will accept chunks) to the mapping of chunks to upload to it. The mapping of uploaded chunks looks like this: the chunk size code is mapped to the number of such chunks, eg: C{ {0: 8, 1: 5} }. @rtype: col.Mapping @raises NoSpaceError: if no solution is found to upload the chunks. """ rng = coalesce(rng, random) # Make a copy, to never corrupt the original. _chunks_map = dict(chunks_map) _hosts_free_space_map = dict(hosts_free_space_map) logger.debug('Need to allocate %r at %r', _chunks_map, _hosts_free_space_map) M = 1024 * 1024 result = {} # Let's loop over the desired chunks (actually, its sizes) # by the size descending, and allocate each chunk to some host. _size_codes = \ chain.from_iterable([sz for i in xrange(_chunks_map[sz])] for sz in sorted(_chunks_map.iterkeys(), reverse=True)) for chunk_size_code in _size_codes: this_chunk_size = 2**chunk_size_code * M # Which hosts still have enough space? candidate_host_uuids = \ [u for u, sz in _hosts_free_space_map.iteritems() if this_chunk_size <= sz] logger.verbose('To allocate %r MiB, we have these hosts: %r', this_chunk_size, candidate_host_uuids) if not candidate_host_uuids: # Oops, we still have chunks unallocated, # but no more host can receive it. raise NoSpaceError() else: use_host_uuid = rng.choice(candidate_host_uuids) # Mark some space on this host as occupied. _hosts_free_space_map[use_host_uuid] -= this_chunk_size # Take chunk counts for this host... per_host_chunks_counts = result.setdefault(use_host_uuid, {}) # ... and increase the chunk count for the currently # processed chunk. per_host_chunks_counts[chunk_size_code] = \ per_host_chunks_counts.get(chunk_size_code, 0) + 1 return result
def __restore_files(self): """Internal procedure which actually restores the files. @todo: the Fingerprint calculation should be turned into "file is read by blocks and then repacked into 16KiB segments"; then recalculation of the fingerprint in case of FP mismatch won't be needed. """ _message = self.message my_host = self.manager.app.host feature_set = self.manager.app.feature_set ds = _message.dataset wr_uuid = _message.wr_uuid ugroup = _message.ugroup restore_directory = self.__get_restore_directory() assert _message.sync == (wr_uuid is None) == (ds.uuid is not None), \ (_message.sync, wr_uuid, ds) base_dir_id = None # will be used later if restore_directory is None: logger.error('Do not know the restore directory') else: logger.debug('Going to restore dataset %r for %r to %r', ds, ugroup, restore_directory) if not os.path.exists(restore_directory): os.makedirs(restore_directory) group_key = ugroup.enc_key if feature_set.per_group_encryption \ else None cryptographer = Cryptographer(group_key=group_key, key_generator=None) is_whole_dataset_restored = _message.sync logger.debug('Restoring %s files from dataset: %r', 'all' if is_whole_dataset_restored else 'selected', coalesce(ds, 'N/A')) # TODO: use the "delete time" from the LocalPhysicalFileState! _now = datetime.utcnow() # If we are syncing-in the whole dataset, we should write it # into the DB as a whole. The files/file_locals will be bound to it # so that after restore, we'll know on this Host that these states # are fully synced to the cloud already (in fact, they came # from the cloud). if _message.sync: # Let's hack into the files and substitute the base_dir. # TODO: do it better! for f in _message.files.iterkeys(): f.base_dir = restore_directory # Write the whole dataset to the DB _small_files = _message.files.keys() # not iterkeys(0 for now! _dirs = {restore_directory: (_small_files, [])} # Given the information in the inbound message about # the whole dataset, store this dataset in the DB. dataset = DatasetWithDirectories( name=ds.name, sync=ds.sync, directories=_dirs, # TODO: transport real data # from the node uuid=DatasetUUID.safe_cast_uuid(ds.uuid), ugroup_uuid=UserGroupUUID.safe_cast_uuid( ugroup.uuid), time_started=ds.time_started, time_completed=_now) with db.RDB() as rdbw: # Do we already have the dataset? _ds_in_progress = \ HostQueries.HostDatasets.get_my_ds_in_progress( host_uuid=my_host.uuid, ds_uuid=dataset.uuid, rdbw=rdbw) if _ds_in_progress is None: # We don't have it, insert. dummy_ds_uuid = \ HostQueries.HostDatasets.create_dataset_for_backup( my_host.uuid, dataset, rdbw) assert dummy_ds_uuid == dataset.uuid, \ (dummy_ds_uuid, dataset.uuid) base_dir_id = \ HostQueries.HostFiles.add_or_get_base_directory( restore_directory, ugroup.uuid, rdbw) error_in_any_file_occured = False # # Finally, loop over the files and restore each one # for file_, file_blocks in _message.files.iteritems(): self.__restore_op_for_path(file_, file_blocks, is_whole_dataset_restored, base_dir_id, restore_directory, cryptographer, ds) # Loop over the files completed if is_whole_dataset_restored: logger.debug('Restoring %r completed, there were %s issues.', ds, 'some' if error_in_any_file_occured else 'no') if not error_in_any_file_occured: with db.RDB() as rdbw: logger.debug('Updating %r at host %s...', ds, my_host.uuid) ds_to_finish = \ Queries.Datasets.get_dataset_by_uuid(ds.uuid, my_host.uuid, rdbw) ds_to_finish.time_completed = datetime.utcnow() logger.debug('Updating %r as completed', dataset) # Mark the current dataset as completed # only after the response from the node is received. Queries.Datasets.update_dataset(my_host.uuid, ds_to_finish, rdbw) # Everything seems ok to this moment with self.open_state(for_update=True) as state: state.ack_result_code = RestoreMessage.ResultCodes.OK
def __backup_some_phys_files(self, base_dir, files, ugroup, __do_start_backup=True): r"""Given some files, create a new dataset and start to backup them. >>> # ugroup = UserGroup( >>> # uuid=UserGroupUUID('00000000-bbbb-0000-0000-000000000001'), >>> # name='AlphA', >>> # private=True, >>> # enc_key='\x01\xe6\x13\xdab)\xd2n\xd6\xafTH\x03h\x02\x12' >>> # '\x17D\x1a\xeb\x8b6\xc0\x9b\xa6\x7f\xcc\x06N\xcf' >>> # '\x8b\xcd' >>> # ) >>> # __backup_some_phys_files( >>> # base_dir='u'/home/john/FreeBrie', >>> # files=[ >>> # LocalPhysicalFileStateRel( >>> # rel_dir='', >>> # rel_file=u'f1.mp3', >>> # size=13829879, >>> # time_changed=datetime(2012, 11, 5, 12,12,41,904430)), >>> # LocalPhysicalFileStateRel( >>> # rel_dir='', >>> # rel_file=u'f2.avi', >>> # size=3522710, >>> # time_changed=datetime(2012, 11, 5, 12,12,41,988433)), >>> # LocalPhysicalFileStateRel( >>> # rel_dir=u'a/b', >>> # rel_file=u'bbb', >>> # size=4, >>> # time_changed=datetime(2012, 10, 11, 15 33 42 19808)), >>> # LocalPhysicalFileStateRel( >>> # rel_dir=u'a/b/c', >>> # rel_file=u'ccc', >>> # size=4, >>> # time_changed=datetime(2012, 10, 11, 15 33 41 979807)) >>> # ], >>> # ugroup=ugroup) @todo: complete the unit test, which is half-done! @param base_dir: the directory being backed up. @type base_dir: basestring @param files: the iterable over the files which should be backed up. Contains C{LocalPhysicalFileStateRel} objects. The caller should ensure that C{files} is non-empty! @type files: col.Iterable @type ugroup: UserGroup @return: the created dataset (if succeeded). @rtype: DatasetOnPhysicalFiles, NoneType """ logger.debug('__backup_some_phys_files(%r, %r)', base_dir, ugroup) # Group files by rel_dir; then ignore base_dir, # keep only rel_dir, rel_file, size and time_changed files_grouped_by_rel_dir = \ ((RelVirtualFile(rel_dir=f.rel_dir, filename=f.rel_file, # If we can read real stat, read it; # otherwise we'll emulate it with fake_stat stat=coalesce(os_ex.safe_stat( # real stat os.path.join(base_dir, f.rel_path)), os_ex.fake_stat( # deleted file st_mode=None, atime=f.time_changed, mtime=f.time_changed, ctime=f.time_changed, size=None)), stat_getter=lambda f=f: os_ex.safe_stat( os.path.join(base_dir, f.rel_path)), file_getter=lambda f=f: open(os.path.join(base_dir, f.rel_path), 'rb')) for f in per_rel_dir) for rel_dir, per_rel_dir in sorted_groupby(files, attrgetter('rel_dir'))) # Example: # files_grouped_by_rel_dir = [ # [ # RelVirtualFile(...), # RelVirtualFile(...), # RelVirtualFile(...) # ], # [ # RelVirtualFile(...), # [ # RelVirtualFile(...) # ] # ] _path_map = {base_dir: {'ifiles': files_grouped_by_rel_dir, 'stat': os_ex.safe_stat(base_dir)}} ds_uuid = DatasetUUID.safe_cast_uuid(gen_uuid()) ds = self.select_paths_for_backup(ds_name='', ds_uuid=ds_uuid, ugroup_uuid=ugroup.uuid, sync=True, paths_map=_path_map) if ds is not None and __do_start_backup: self.start_backup(ds_uuid) return ds
def query_data_replication_stats(self, ds_uuid, path, recursive=False, on_received=None): """ Request the replication statistics for the particular (or any) files in the particular (or any) dataset for this host. @param ds_uuid: the UUID of the dataset in the cloud (or C{None} if any dataset is ok). @type ds_uuid: UUID, NoneType @param path: the path which needs the statistics (or C{None} if any path in the dataset is ok). @type path: basestring, NoneType @param recursive: whether the path should be treated recursive. @type recursive: bool @param on_received: the handler which is called when the data is received. If C{None}, nothing is called, the data is just output via the logger. @type on_received: col.Callable, NoneType """ _query = { 'select': ('file_count', 'file_size', 'uniq_file_count', 'uniq_file_size', 'full_replicas_count', 'chunk_count', 'chunk_replicas_count', 'hosts_count'), 'from': 'data_stats', 'where': { 'dataset': ds_uuid.hex if ds_uuid is not None else '*', 'path': coalesce(path, '*'), 'rec': recursive } } nifn_tr = self.tr_manager.create_new_transaction( name='NEED_INFO_FROM_NODE', src=self.host, dst=self.primary_node, # NEED_INFO_FROM_NODE-specific query=_query) @exceptions_logged(logger) def error_handler(failure): logger_status.error( 'The statistics for the %r/%r could not ' 'be queried from the cloud: \r %s', ds_uuid, path, failure, failure.getErrorMessage(), extra={'_type': 'error_on_querying_statistics_from_dataset'}) @exceptions_logged(logger) @contract_epydoc def success_handler(ni_state): """ @type ni_state: transactions.NeedInfoFromNodeTransaction_Host.State """ if on_received is not None: on_received(ds_uuid, path, ni_state.ack_result) logger_status.info('Data statistics %r', ni_state.ack_result, extra={ '_type': 'data_stats', 'ds_uuid': ds_uuid, 'path': path, 'value': ni_state.ack_result }) # Do not use addCallbacks() here nifn_tr.completed.addCallback(success_handler) nifn_tr.completed.addErrback(error_handler)
def __init__(self, code, username_uc, ds_uuid, basedirectory_path, rel_path, active=True, blocked=False, viewsN=0, downloadsN=0, created_ts=None, last_view_ts=None, last_download_ts=None, mime=None, *args, **kwargs): r"""Constructor. >>> # No optional arguments >>> MagnetLink(code='jDd6r', ... username_uc='*****@*****.**', ... ds_uuid=UUID('766796e9-6f08-456d-806b-a24bc01f180c'), ... basedirectory_path='C:/Windows/System32/drivers', ... rel_path='etc/hosts', ... ) # doctest:+NORMALIZE_WHITESPACE +ELLIPSIS MagnetLink(_id=None, username_uc="*****@*****.**", ds_uuid=UUID('766796e9-6f08-456d-806b-a24bc01f180c'), basedirectory_path="C:/Windows/System32/drivers", rel_path="etc/hosts", active=True, viewsN=0, downloadsN=0, created_ts=datetime.datetime(...)) >>> # All optional arguents >>> MagnetLink( ... code='KzFp3', ... username_uc='*****@*****.**', ... ds_uuid=UUID('ac349f0e-3c16-7832-acfe-c4357ac18c57'), ... basedirectory_path='/home/username/my_folder', ... rel_path='photos/avatar.png', ... active=False, ... viewsN=3, ... downloadsN=1, ... created_ts=datetime(2012, 1, 1, 1, 1, 1, 111111), ... blocked=True, ... last_view_ts=datetime(2012, 2, 2, 2, 2, 2, 222222), ... last_download_ts=datetime(2012, 3, 3, 3, 3, 3, 333333), ... mime="test/mimetype" ... ) # doctest:+NORMALIZE_WHITESPACE MagnetLink(_id=None, username_uc="*****@*****.**", ds_uuid=UUID('ac349f0e-3c16-7832-acfe-c4357ac18c57'), basedirectory_path="/home/username/my_folder", rel_path="photos/avatar.png", active=False, viewsN=3, downloadsN=1, created_ts=datetime.datetime(2012, 1, 1, 1, 1, 1, 111111), blocked=True, last_view_ts=datetime.datetime(2012, 2, 2, 2, 2, 2, 222222), last_download_ts=datetime.datetime(2012, 3, 3, 3, 3, 3, 333333), mime="test/mimetype") @param code: the code of magnet link @type code: basestring @param username_uc: the name of the user. @type username_uc: basestring @param ds_uuid: the UUID of the Host. @type ds_uuid: UUID @param basedirectory_path: the path of base_directory. Is using to find unique file. @type basedirectory_path: basestring @param rel_path: the rel_path of file_local. @type rel_path: basestring @param active: is magnetlink is active or not. @type active: bool @param viewsN: count of views specific magnet link. @type viewsN: numbers.Integral @param downloadsN: count of downloads file of magnet link. @type downloadsN: numbers.Integral @type created_ts: datetime, NoneType @param blocked: is magnetlink blocked by administrator or not. @type blocked: bool @type last_view_ts: datetime, NoneType @type last_download_ts: datetime, NoneType @param mime: mimetype of the file. @type mime: basestring, NoneType """ super(MagnetLink, self).__init__(*args, **kwargs) self.code = code self.username_uc = username_uc self.ds_uuid = ds_uuid self.basedirectory_path = basedirectory_path self.rel_path = rel_path # By the way we need to be able to set other values but default. # I don't know for what, but I think so. self.active = active self.viewsN = viewsN self.downloadsN = downloadsN self.created_ts = coalesce(created_ts, datetime.utcnow()) # Optional fields self.blocked = blocked self.last_view_ts = last_view_ts self.last_download_ts = last_download_ts self.mime = coalesce(mime, guess_type(rel_path)[0])
def __init__(self, ds_uuid, file_paths_for_basedirs=None, wr_uuid=None, success=False, num=0, of=0, num_bytes=0, of_bytes=0, last_progress_recalc_time=None, pending_chunk_uuids=None, pending_host_uuids=None, no_donors_retries=0, ack_result_code=None, *args, **kwargs): r"""Constructor. >>> # No optional arguments. >>> RestoreTransactionState_Node( ... ds_uuid=UUID('c126ea26-d7e7-4394-a75a-c6f3ef6bc32a'), ... tr_start_time=datetime(2012, 9, 26, 14, 29, 48, 877434), ... tr_uuid=UUID('1a82a181-741d-4a64-86e5-77a7dd000ba2'), ... tr_src_uuid=UUID('fa87ebfd-d498-4ba6-9f04-a933e4512b24'), ... tr_dst_uuid=UUID('e6aa4157-ee8a-449e-a2d5-3340a59e717d') ... ) # doctest:+ELLIPSIS,+NORMALIZE_WHITESPACE Restore...State_Node(tr_start_time=datetime.datetime(2012, 9, 26, 14, 29, 48, 877434), tr_uuid=UUID('1a82a181-741d-4a64-86e5-77a7dd000ba2'), tr_src_uuid=UUID('fa87ebfd-d498-4ba6-9f04-a933e4512b24'), tr_dst_uuid=UUID('e6aa4157-ee8a-449e-a2d5-3340a59e717d'), ds_uuid=UUID('c126ea26-d7e7-4394-a75a-c6f3ef6bc32a')) >>> # All optional arguments. >>> RestoreTransactionState_Node( ... ds_uuid=UUID('c126ea26-d7e7-4394-a75a-c6f3ef6bc32a'), ... file_paths_for_basedirs={ ... '/home/johndoe': ['.zrc', 'bin/f1', 'music/Abba/01.mp3'], ... '/etc': ['passwd', 'init.d/networking'], ... }, ... wr_uuid=UUID('998d45e6-a5ea-4ffd-a595-982edfa8e3e9'), ... success=False, ... num=15, of=17, ... num_bytes=47, of_bytes=123, ... last_progress_recalc_time=datetime(2012, 11, 5, 16, 7, 37, ... 88896), ... pending_chunk_uuids= ... [UUID('cb8fb4d2-563d-413f-80f9-9a55c65ea15c'), ... UUID('a5dc34b3-b7d1-49fc-8b2f-178eb4ea12fd'), ... UUID('6d78a6e6-e223-4233-9d37-3c16d625b00d')], ... pending_host_uuids= ... [UUID('59ad73e3-0cad-4402-8811-65ac4736bdc3'), ... UUID('6812914c-0c3f-411d-9770-25c56473fc5f')], ... no_donors_retries=3, ... ack_result_code=42, ... tr_start_time=datetime(2012, 9, 26, 14, 29, 48, 877434), ... tr_uuid=UUID('1a82a181-741d-4a64-86e5-77a7dd000ba2'), ... tr_src_uuid=UUID('fa87ebfd-d498-4ba6-9f04-a933e4512b24'), ... tr_dst_uuid=UUID('e6aa4157-ee8a-449e-a2d5-3340a59e717d'), ... ) # doctest:+ELLIPSIS,+NORMALIZE_WHITESPACE Restore...State_Node(tr_start_time=datetime.datetime(2012, 9, 26, 14, 29, 48, 877434), tr_uuid=UUID('1a82a181-741d-4a64-86e5-77a7dd000ba2'), tr_src_uuid=UUID('fa87ebfd-d498-4ba6-9f04-a933e4512b24'), tr_dst_uuid=UUID('e6aa4157-ee8a-449e-a2d5-3340a59e717d'), ds_uuid=UUID('c126ea26-d7e7-4394-a75a-c6f3ef6bc32a'), file_paths_for_basedirs={'/home/johndoe': ['.zrc', 'bin/f1', 'music/Abba/01.mp3'], '/etc': ['passwd', 'init.d/networking']}, wr_uuid=UUID('998d45e6-a5ea-4ffd-a595-982edfa8e3e9'), num=15, of=17, num_bytes=47, of_bytes=123, last_progress_recalc_time=datetime.datetime(2012, 11, 5, 16, 7, 37, 88896), pending_chunk_uuids=[UUID('6d78a6e6-e223-4233-9d37-3c16d625b00d'), UUID('a5dc34b3-b7d1-49fc-8b2f-178eb4ea12fd'), UUID('cb8fb4d2-563d-413f-80f9-9a55c65ea15c')], pending_host_uuids=[UUID('59ad73e3-0cad-4402-8811-65ac4736bdc3'), UUID('6812914c-0c3f-411d-9770-25c56473fc5f')], no_donors_retries=3, ack_result_code=42) @param ds_uuid: dataset UUID. @type ds_uuid: UUID @param file_paths_for_basedirs: either an mapping from the base directories to the (relative) file paths to restore (or even maybe to C{None}, if all files from a base directory are needed), or C{None} if all files from the dataset are needed. @type file_paths_for_basedirs: col.Mapping, NoneType @type wr_uuid: UUID, NoneType @type pending_chunk_uuids: col.Iterable, NoneType @type pending_host_uuids: col.Iterable, NoneType @param no_donors_retries: how many retries on "no donors found" case have already occured. @type no_donors_retries: numbers.Integral """ super(RestoreTransactionState_Node, self) \ .__init__(*args, **kwargs) (self.ds_uuid, self.file_paths_for_basedirs, self.wr_uuid) = \ (ds_uuid, file_paths_for_basedirs, wr_uuid) (self.num, self.of) = (num, of) (self.num_bytes, self.of_bytes) = (num_bytes, of_bytes) self.last_progress_recalc_time = coalesce(last_progress_recalc_time, kwargs['tr_start_time']) self.success = success self.pending_chunk_uuids = set(coalesce(pending_chunk_uuids, [])) self.pending_host_uuids = coalesce(pending_host_uuids, []) self.no_donors_retries = no_donors_retries # Results self.ack_result_code = ack_result_code assert self.is_valid_bsonable(), repr(self)
def __init__(self, chunks_to_replicate=None, chunks_to_restore=None, *args, **kwargs): r"""Constructor. Either of the two fields, C{chunks_to_replicate} or C{chunks_to_restore}, is likely created. >>> from datetime import datetime >>> from uuid import UUID >>> from common.chunks import ChunkInfo >>> from common.typed_uuids import ChunkUUID >>> tr_start_time = datetime(2012, 9, 26, 14, 29, 48, 877434) >>> tr_uuid = UUID('1a82a181-741d-4a64-86e5-77a7dd000ba2') >>> tr_src_uuid = UUID('fa87ebfd-d498-4ba6-9f04-a933e4512b24') >>> tr_dst_uuid=UUID('e6aa4157-ee8a-449e-a2d5-3340a59e717d') >>> u1, u2, u3, u4 = \ ... (ChunkUUID('5b237ceb-300d-4c88-b4c0-6331cb14b5b4'), ... ChunkUUID('940f0711-52d7-42fb-bf4c-818580f432dc'), ... ChunkUUID('a5b605f2-6ea5-49f3-8658-d217b7e8e784'), ... ChunkUUID('0a7064b3-bef6-45c0-9e82-e9f9a40dfcf3')) >>> host_uuid_1, host_uuid_2 = \ ... (PeerUUID('233ad9c2-268f-4506-ab0f-4c71461c5d88'), ... PeerUUID('e96a073b-3cd0-49a6-b14a-1fb04c221a9c')) >>> # No optional arguments. >>> ReceiveChunksTransactionState_Node( ... tr_start_time=tr_start_time, ... tr_uuid=tr_uuid, ... tr_src_uuid=tr_src_uuid, ... tr_dst_uuid=tr_dst_uuid ... ) # doctest:+ELLIPSIS,+NORMALIZE_WHITESPACE ReceiveCh...State_Node(tr_start_time=datetime.datetime(2012, 9, 26, 14, 29, 48, 877434), tr_uuid=UUID('1a82a181-741d-4a64-86e5-77a7dd000ba2'), tr_src_uuid=UUID('fa87ebfd-d498-4ba6-9f04-a933e4512b24'), tr_dst_uuid=UUID('e6aa4157-ee8a-449e-a2d5-3340a59e717d')) >>> # All optional arguments. >>> ReceiveChunksTransactionState_Node( ... chunks_to_replicate={ ... host_uuid_1: ... [ChunkInfo(crc32=0x2A5FE875, uuid=u4, ... maxsize_code=1, ... hash='abcdabcd' * 8, size=73819)], ... host_uuid_2: ... [ChunkInfo(crc32=0x07FD7A5B, uuid=u1, ... maxsize_code=1, ... hash='abcdefgh' * 8, size=2097152), ... ChunkInfo(crc32=0x7E5CE7AD, uuid=u2, ... maxsize_code=0, ... hash='01234567' * 8, size=143941), ... ChunkInfo(crc32=0xDCC847D8, uuid=u3, ... maxsize_code=1, ... hash='76543210' * 8, size=2097151)] ... }, ... chunks_to_restore={ ... host_uuid_1: ... [ChunkInfo(crc32=0x07FD7A5B, uuid=u1, ... maxsize_code=1, ... hash='abcdefgh' * 8, size=2097152), ... ChunkInfo(crc32=0x7E5CE7AD, uuid=u2, ... maxsize_code=0, ... hash='01234567' * 8, size=143941), ... ChunkInfo(crc32=0xDCC847D8, uuid=u3, ... maxsize_code=1, ... hash='76543210' * 8, size=2097151)], ... host_uuid_2: ... [ChunkInfo(crc32=0x2A5FE875, uuid=u1, ... maxsize_code=1, ... hash='abcdabcd' * 8, size=73819)] ... }, ... tr_start_time=tr_start_time, ... tr_uuid=tr_uuid, ... tr_src_uuid=tr_src_uuid, ... tr_dst_uuid=tr_dst_uuid ... ) # doctest:+ELLIPSIS,+NORMALIZE_WHITESPACE ReceiveCh...State_Node(tr_start_time=datetime.datetime(2012, 9, 26, 14, 29, 48, 877434), tr_uuid=UUID('1a82a181-741d-4a64-86e5-77a7dd000ba2'), tr_src_uuid=UUID('fa87ebfd-d498-4ba6-9f04-a933e4512b24'), tr_dst_uuid=UUID('e6aa4157-ee8a-449e-a2d5-3340a59e717d'), chunks_to_replicate={PeerUUID('e96a073b-...fb04c221a9c'): [ChunkInfo(uuid=ChunkUUID('5b237ceb-300d...-6331cb14b5b4'), maxsize_code=1, hash=unhexlify('6162636465...6162636465666768'), size=2097152, crc32=0x07FD7A5B), ChunkInfo(uuid=ChunkUUID('940f0711-52d7...-818580f432dc'), maxsize_code=0, hash=unhexlify('3031323334...3031323334353637'), size=143941, crc32=0x7E5CE7AD), ChunkInfo(uuid=ChunkUUID('a5b605f2-6ea5...-d217b7e8e784'), maxsize_code=1, hash=unhexlify('3736353433...3736353433323130'), size=2097151, crc32=0xDCC847D8)], PeerUUID('233ad9c2-268f-4506-ab0f-4c71461c5d88'): [ChunkInfo(uuid=ChunkUUID('0a7064b3-bef6...-e9f9a40dfcf3'), maxsize_code=1, hash=unhexlify('6162636461...6162636461626364'), size=73819, crc32=0x2A5FE875)]}, chunks_to_restore={PeerUUID('e96a073b-...fb04c221a9c'): [ChunkInfo(uuid=ChunkUUID('5b237ceb-300d...-6331cb14b5b4'), maxsize_code=1, hash=unhexlify('6162636461...6162636461626364'), size=73819, crc32=0x2A5FE875)], PeerUUID('233ad9c2-268f-4506-ab0f-4c71461c5d88'): [ChunkInfo(uuid=ChunkUUID('5b237ceb-300d...-6331cb14b5b4'), maxsize_code=1, hash=unhexlify('6162636465...6162636465666768'), size=2097152, crc32=0x07FD7A5B), ChunkInfo(uuid=ChunkUUID('940f0711-52d7...-818580f432dc'), maxsize_code=0, hash=unhexlify('3031323334...3031323334353637'), size=143941, crc32=0x7E5CE7AD), ChunkInfo(uuid=ChunkUUID('a5b605f2-6ea5...-d217b7e8e784'), maxsize_code=1, hash=unhexlify('3736353433...3736353433323130'), size=2097151, crc32=0xDCC847D8)]}) @type chunks_to_replicate: NoneType, col.Mapping @type chunks_to_restore: NoneType, col.Mapping """ super(ReceiveChunksTransactionState_Node, self).__init__(*args, **kwargs) self.chunks_to_replicate = dict(coalesce(chunks_to_replicate, {})) self.chunks_to_restore = dict(coalesce(chunks_to_restore, {})) if __debug__: for d in (self.chunks_to_replicate, self.chunks_to_restore): assert consists_of(d.iterkeys(), PeerUUID), \ repr(d.keys()) assert consists_of(d.itervalues(), list), repr(d.values()) assert all(consists_of(per_inh_chunks, Chunk) for per_inh_chunks in d.itervalues()), \ repr(d.values()) assert self.is_valid_bsonable(), repr(self)
def __init__( self, code, username_uc, ds_uuid, basedirectory_path, rel_path, active=True, blocked=False, viewsN=0, downloadsN=0, created_ts=None, last_view_ts=None, last_download_ts=None, mime=None, *args, **kwargs ): r"""Constructor. >>> # No optional arguments >>> MagnetLink(code='jDd6r', ... username_uc='*****@*****.**', ... ds_uuid=UUID('766796e9-6f08-456d-806b-a24bc01f180c'), ... basedirectory_path='C:/Windows/System32/drivers', ... rel_path='etc/hosts', ... ) # doctest:+NORMALIZE_WHITESPACE +ELLIPSIS MagnetLink(_id=None, username_uc="*****@*****.**", ds_uuid=UUID('766796e9-6f08-456d-806b-a24bc01f180c'), basedirectory_path="C:/Windows/System32/drivers", rel_path="etc/hosts", active=True, viewsN=0, downloadsN=0, created_ts=datetime.datetime(...)) >>> # All optional arguents >>> MagnetLink( ... code='KzFp3', ... username_uc='*****@*****.**', ... ds_uuid=UUID('ac349f0e-3c16-7832-acfe-c4357ac18c57'), ... basedirectory_path='/home/username/my_folder', ... rel_path='photos/avatar.png', ... active=False, ... viewsN=3, ... downloadsN=1, ... created_ts=datetime(2012, 1, 1, 1, 1, 1, 111111), ... blocked=True, ... last_view_ts=datetime(2012, 2, 2, 2, 2, 2, 222222), ... last_download_ts=datetime(2012, 3, 3, 3, 3, 3, 333333), ... mime="test/mimetype" ... ) # doctest:+NORMALIZE_WHITESPACE MagnetLink(_id=None, username_uc="*****@*****.**", ds_uuid=UUID('ac349f0e-3c16-7832-acfe-c4357ac18c57'), basedirectory_path="/home/username/my_folder", rel_path="photos/avatar.png", active=False, viewsN=3, downloadsN=1, created_ts=datetime.datetime(2012, 1, 1, 1, 1, 1, 111111), blocked=True, last_view_ts=datetime.datetime(2012, 2, 2, 2, 2, 2, 222222), last_download_ts=datetime.datetime(2012, 3, 3, 3, 3, 3, 333333), mime="test/mimetype") @param code: the code of magnet link @type code: basestring @param username_uc: the name of the user. @type username_uc: basestring @param ds_uuid: the UUID of the Host. @type ds_uuid: UUID @param basedirectory_path: the path of base_directory. Is using to find unique file. @type basedirectory_path: basestring @param rel_path: the rel_path of file_local. @type rel_path: basestring @param active: is magnetlink is active or not. @type active: bool @param viewsN: count of views specific magnet link. @type viewsN: numbers.Integral @param downloadsN: count of downloads file of magnet link. @type downloadsN: numbers.Integral @type created_ts: datetime, NoneType @param blocked: is magnetlink blocked by administrator or not. @type blocked: bool @type last_view_ts: datetime, NoneType @type last_download_ts: datetime, NoneType @param mime: mimetype of the file. @type mime: basestring, NoneType """ super(MagnetLink, self).__init__(*args, **kwargs) self.code = code self.username_uc = username_uc self.ds_uuid = ds_uuid self.basedirectory_path = basedirectory_path self.rel_path = rel_path # By the way we need to be able to set other values but default. # I don't know for what, but I think so. self.active = active self.viewsN = viewsN self.downloadsN = downloadsN self.created_ts = coalesce(created_ts, datetime.utcnow()) # Optional fields self.blocked = blocked self.last_view_ts = last_view_ts self.last_download_ts = last_download_ts self.mime = coalesce(mime, guess_type(rel_path)[0])
def __restore_files(self): """Internal procedure which actually restores the files. @todo: the Fingerprint calculation should be turned into "file is read by blocks and then repacked into 16KiB segments"; then recalculation of the fingerprint in case of FP mismatch won't be needed. """ _message = self.message my_host = self.manager.app.host feature_set = self.manager.app.feature_set ds = _message.dataset wr_uuid = _message.wr_uuid ugroup = _message.ugroup restore_directory = self.__get_restore_directory() assert _message.sync == (wr_uuid is None) == (ds.uuid is not None), \ (_message.sync, wr_uuid, ds) base_dir_id = None # will be used later if restore_directory is None: logger.error('Do not know the restore directory') else: logger.debug('Going to restore dataset %r for %r to %r', ds, ugroup, restore_directory) if not os.path.exists(restore_directory): os.makedirs(restore_directory) group_key = ugroup.enc_key if feature_set.per_group_encryption \ else None cryptographer = Cryptographer(group_key=group_key, key_generator=None) is_whole_dataset_restored = _message.sync logger.debug('Restoring %s files from dataset: %r', 'all' if is_whole_dataset_restored else 'selected', coalesce(ds, 'N/A')) # TODO: use the "delete time" from the LocalPhysicalFileState! _now = datetime.utcnow() # If we are syncing-in the whole dataset, we should write it # into the DB as a whole. The files/file_locals will be bound to it # so that after restore, we'll know on this Host that these states # are fully synced to the cloud already (in fact, they came # from the cloud). if _message.sync: # Let's hack into the files and substitute the base_dir. # TODO: do it better! for f in _message.files.iterkeys(): f.base_dir = restore_directory # Write the whole dataset to the DB _small_files = _message.files.keys() # not iterkeys(0 for now! _dirs = {restore_directory: (_small_files, [])} # Given the information in the inbound message about # the whole dataset, store this dataset in the DB. dataset = DatasetWithDirectories( name=ds.name, sync=ds.sync, directories=_dirs, # TODO: transport real data # from the node uuid=DatasetUUID.safe_cast_uuid(ds.uuid), ugroup_uuid=UserGroupUUID.safe_cast_uuid(ugroup.uuid), time_started=ds.time_started, time_completed=_now) with db.RDB() as rdbw: # Do we already have the dataset? _ds_in_progress = \ HostQueries.HostDatasets.get_my_ds_in_progress( host_uuid=my_host.uuid, ds_uuid=dataset.uuid, rdbw=rdbw) if _ds_in_progress is None: # We don't have it, insert. dummy_ds_uuid = \ HostQueries.HostDatasets.create_dataset_for_backup( my_host.uuid, dataset, rdbw) assert dummy_ds_uuid == dataset.uuid, \ (dummy_ds_uuid, dataset.uuid) base_dir_id = \ HostQueries.HostFiles.add_or_get_base_directory( restore_directory, ugroup.uuid, rdbw) error_in_any_file_occured = False # # Finally, loop over the files and restore each one # for file_, file_blocks in _message.files.iteritems(): self.__restore_op_for_path(file_, file_blocks, is_whole_dataset_restored, base_dir_id, restore_directory, cryptographer, ds) # Loop over the files completed if is_whole_dataset_restored: logger.debug('Restoring %r completed, there were %s issues.', ds, 'some' if error_in_any_file_occured else 'no') if not error_in_any_file_occured: with db.RDB() as rdbw: logger.debug('Updating %r at host %s...', ds, my_host.uuid) ds_to_finish = \ Queries.Datasets.get_dataset_by_uuid(ds.uuid, my_host.uuid, rdbw) ds_to_finish.time_completed = datetime.utcnow() logger.debug('Updating %r as completed', dataset) # Mark the current dataset as completed # only after the response from the node is received. Queries.Datasets.update_dataset( my_host.uuid, ds_to_finish, rdbw) # Everything seems ok to this moment with self.open_state(for_update=True) as state: state.ack_result_code = RestoreMessage.ResultCodes.OK