def load(class_, harmony_directory): """ @return Repository instance for repo in working_directory. """ harmony_directory = Path(harmony_directory) working_directory = class_.find_working_directory_here( harmony_directory) repo = class_(working_directory, harmony_directory) # TODO: implement a Configuration class so we can use # Configuration.load(...) here repo_config = serialization.read(harmony_directory / Repository.REPOSITORY_FILE) repo.id = repo_config['id'] repo.name = repo_config['name'] def load_component(class_): return class_.load(class_.get_path(repo.harmony_directory)) repo.location_states = load_component(LocationStates) repo.repository_state = load_component(RepositoryState) repo.ruleset = load_component(Ruleset) repo.remotes = load_component(Remotes) repo.working_directory = WorkingDirectory(working_directory, repo.ruleset) logging.info('Loaded repository') logging.info(' ID : {} ({})'.format(shortened_id(repo.id), repo.id)) logging.info(' Name: {}'.format(repo.name)) logging.info(' WD : {}'.format(repo.working_directory.path)) return repo
def fetch(self, remote_spec): location = self.remotes.get_location_any(remote_spec) logger.debug('{} fetching from {} which is at {} to {}'.format( self.short_id, remote_spec, location, self.harmony_directory)) location_states_path = LocationStates.get_path(self.HARMONY_SUBDIR) repository_state_path = RepositoryState.get_path(self.HARMONY_SUBDIR) with protocols.connect(location) as connection: files = connection.pull_harmony_files( [location_states_path, repository_state_path]) remote_location_states = LocationStates.load( files[location_states_path]) repository_state = RepositoryState.load( files[repository_state_path]) logger.debug('{} fetched remote state:'.format(self.short_id)) for lid, location in remote_location_states.items.items(): logger.debug(' {}:'.format(shortened_id(lid))) for f in location.files.values(): logger.debug(' {}'.format(f.__dict__)) self.location_states.update(remote_location_states) self.location_states.save() return repository_state
def short_id(self): """ Provide a shortened version of the ID for easier reconizability in log messages. Much more likely to collide for many repos, but incredibly useful for quickly understanding logs of unittests. """ return shortened_id(self.id)
def update(self, other): logger.debug('location_states update') for id_, d in other.items.items(): assert isinstance(d, LocationState) if id_ not in self.items or self.items[id_].clock < d.clock: logger.debug( 'overwriting state for {} (={}) with remote'.format( shortened_id(id_), id_)) self.items[id_] = deepcopy(d) else: logger.debug('keeping state for {}'.format(id_)) logger.debug(' clock local: {} t={}'.format( self.items[id_].clock, self.items[id_].last_modification)) logger.debug(' {}'.format(self.items[id_].files)) logger.debug(' clock remote: {} t={}'.format( d.clock, d.last_modification)) logger.debug(' {}'.format(d.files))
def init(class_, working_directory, name=None): """ Create fresh repository in given working dir. @return Repository instance for created repo. """ harmony_directory = class_.find_harmony_directory_here( working_directory) working_directory = class_.find_working_directory_here( working_directory) if name is None: name = class_.generate_name(working_directory) harmony_directory.mkdir() repo = Repository(working_directory, harmony_directory) def make_component(class_): return class_.init(class_.get_path(repo.harmony_directory)) repo.location_states = make_component(LocationStates) repo.repository_state = make_component(RepositoryState) repo.ruleset = make_component(Ruleset) repo.remotes = make_component(Remotes) repo.working_directory = WorkingDirectory(working_directory, repo.ruleset) repo.id = uuid.uuid1().hex repo.name = name logging.info('Initialized repository') logging.info(' ID : {} ({})'.format(shortened_id(repo.id), repo.id)) logging.info(' Name: {}'.format(repo.name)) logging.info(' WD : {}'.format(repo.working_directory.path)) repo.save() return repo
def commit(local_location_id, working_directory, location_states, repository_state): """ Scan the given working directory for changes and commit them to local state storage. That is, update location_states[local_location_id] with the new current file states (digests, "who has what?"). Also update repository_state info ("who made which content decision in what order?") Parameters: local_location_id: ID of the location that is considered local (i.e. the one that belongs to the working_directory instance) working_directory: WorkingDirectory instance representing the local working directory. location_states: LocationStates instance representing the local location state storage. Will (possibly) be modified. repository_state: RepositoryState instance representing the local repository state storage. Will (possibly) be modified. return: True iff any change was recorded. """ id_ = local_location_id short_id = shortened_id(id_) paths = set(working_directory.get_filenames()) \ | set(location_states.get_all_paths(id_)) # 1. update location state # - detect renames (add WIPE entries later for those) # - when a file is *added* that is known to other locations w/ # different digest, let user confirm what he wants to do (see # above) # - increase local clock # # 2. update repository state # - if file changed in step 1: # clock = current clock for local + max for each other location # hash = current local hash # (deviate from this if user selected to do something else) # - if file did not change: # no change in hash or clock # Do all the file scanning before so we can be sure to do it at most # once per file in the WD wd_states = { path: working_directory.generate_file_state(path) for path in paths if working_directory.file_maybe_modified( location_states.get_file_state(id_, path) ) } location_state_cache = { path: location_states.get_file_state(id_, path) for path in paths } any_change = False for path in paths: if path in wd_states: file_state = location_state_cache[path] new_file_state = wd_states[path] changed = location_states.update_file_state(id_, new_file_state) if changed: any_change = True # If the file vanished but a new one with the same digest # popped up, consider that a rename. # Rename means, the old file is WIPEd (instead of just # locally removed) and the new file is added as usual if not new_file_state.exists(): logger.debug('{} vanished'.format(new_file_state.path)) # Iterate over paths to find a possible rename target for path2 in paths: # Rename to itself does not make sense # Rename to a file that has not changed (or better: just appeared) does not make sense if path2 == path or path2 not in wd_states: continue path2_state = location_state_cache[path2] new_path2_state = wd_states[path2] logger.debug('{} rename candidate {} ex before={} ex now={} self.digest={} candidate.digest={}'.format( path, path2, path2_state.exists(), new_path2_state.exists(), file_state.digest, new_path2_state.digest )) if not path2_state.exists() \ and new_path2_state.exists() \ and new_path2_state.digest == file_state.digest: logger.info('Detected rename: {} -> {}'.format(path, path2)) new_file_state.wipe = True new_file_state.digest = file_state.digest break repository_state.update_file_state( new_file_state, id_, location_states.get_clock(id_) + 1, ) logger.debug('{} committed: {} clk={}'.format(short_id, new_file_state.path, location_states.get_clock(id_) + 1)) else: logger.debug('{} not actually changed: {}'.format(short_id, path)) else: logger.debug('{} not changed: {}'.format(short_id, path)) return any_change