def migrate_from_0(): logging.info("Migrating datastore from version 0 to version 1") root_path = layoutmanager.get_instance().get_root_path() old_root_path = os.path.join(root_path, "store") if not os.path.exists(old_root_path): return for f in os.listdir(old_root_path): uid, ext = os.path.splitext(f) if ext != ".metadata": continue logging.debug("Migrating entry %r", uid) new_entry_dir = layoutmanager.get_instance().get_metadata_path(uid) if not os.path.exists(new_entry_dir): os.makedirs(new_entry_dir) try: _migrate_metadata(root_path, old_root_path, uid) _migrate_file(root_path, old_root_path, uid) _migrate_preview(root_path, old_root_path, uid) except Exception: logging.exception("Error while migrating entry %r", uid) # Just be paranoid, it's cheap. if old_root_path.endswith("datastore/store"): shutil.rmtree(old_root_path) logging.info("Migration finished")
def migrate_from_0(): logging.info('Migrating datastore from version 0 to version 1') root_path = layoutmanager.get_instance().get_root_path() old_root_path = os.path.join(root_path, 'store') if not os.path.exists(old_root_path): return for f in os.listdir(old_root_path): uid, ext = os.path.splitext(f) if ext != '.metadata': continue logging.debug('Migrating entry %r', uid) new_entry_dir = layoutmanager.get_instance().get_metadata_path(uid) if not os.path.exists(new_entry_dir): os.makedirs(new_entry_dir) try: _migrate_metadata(root_path, old_root_path, uid) _migrate_file(root_path, old_root_path, uid) _migrate_preview(root_path, old_root_path, uid) except Exception: logging.exception('Error while migrating entry %r', uid) # Just be paranoid, it's cheap. if old_root_path.endswith('datastore/store'): shutil.rmtree(old_root_path) logging.info('Migration finished')
def __init__(self): self._database = None self._flush_timeout = None self._pending_writes = 0 root_path = layoutmanager.get_instance().get_root_path() self._index_updated_path = os.path.join(root_path, 'index_updated') self._std_index_path = layoutmanager.get_instance().get_index_path() self._index_path = self._std_index_path
def __init__(self): self._database = None self._flush_timeout = None self._pending_writes = 0 root_path=layoutmanager.get_instance().get_root_path() self._index_updated_path = os.path.join(root_path, 'index_updated') self._std_index_path = layoutmanager.get_instance().get_index_path() self._index_path = self._std_index_path
def hard_link_entry(self, new_uid, existing_uid): existing_file = layoutmanager.get_instance().get_data_path( existing_uid) new_file = layoutmanager.get_instance().get_data_path(new_uid) logging.debug('removing %r', new_file) os.remove(new_file) logging.debug('hard linking %r -> %r', new_file, existing_file) os.link(existing_file, new_file)
def store(self, uid, file_path, transfer_ownership, completion_cb): """Store a file for a given entry. """ dir_path = layoutmanager.get_instance().get_entry_path(uid) if not os.path.exists(dir_path): os.makedirs(dir_path) destination_path = layoutmanager.get_instance().get_data_path(uid) if file_path: if not os.path.isfile(file_path): raise ValueError('No file at %r' % file_path) if os.path.islink(file_path): # Can't keep symlinks (especially pointed to removable medias). # Later, optimizer will help with saving duplicates file_path = os.path.realpath(file_path) # We should not move original file transfer_ownership = False if transfer_ownership: try: logger.debug('FileStore moving from %r to %r', file_path, destination_path) os.rename(file_path, destination_path) completion_cb() except OSError as e: if e.errno == errno.EXDEV: self._async_copy(file_path, destination_path, completion_cb, unlink_src=True) else: raise else: self._async_copy(file_path, destination_path, completion_cb, unlink_src=False) """ TODO: How can we support deleting the file of an entry? elif not file_path and os.path.exists(destination_path): logger.debug('FileStore: deleting %r' % destination_path) os.remove(destination_path) completion_cb() """ else: logger.debug('FileStore: Nothing to do') completion_cb()
def find(self, query, properties): logging.debug('datastore.find %r', query) t = time.time() if not self._index_updating: try: uids, count = self._index_store.find(query) uids = [uid.decode() for uid in uids] except Exception: logging.exception('Failed to query index, will rebuild') self._rebuild_index() if self._index_updating: logging.warning('Index updating, returning all entries') return self._find_all(query, properties) entries = [] for uid in uids: entry_path = layoutmanager.get_instance().get_entry_path(uid) if not os.path.exists(entry_path): logging.warning( 'Inconsistency detected, returning all entries') self._rebuild_index() return self._find_all(query, properties) metadata = self._metadata_store.retrieve(uid, properties) self._fill_internal_props(metadata, uid, properties) entries.append(metadata) logger.debug('find(): %r', time.time() - t) return entries, count
def store(self, uid, metadata): ''' Create or update an entry to the journal ''' # Name of the entry entity_name = layoutmanager.get_instance().get_entity_name(uid) # Get or create the entity update_stats = False entity = None if self._ers.entity_exist(entity_name): entity = self._ers.get_entity(entity_name, local=True) else: entity = self._ers.create_entity(entity_name) update_stats = True # If we need to update the stats, do it if update_stats and 'activity' in metadata: stats_entity_name = 'urn:ers:app:{}:activityStats'.format(metadata['activity']) stats_entity = None if self._ers.entity_exist(stats_entity_name): stats_entity = self._ers.get_entity(stats_entity_name, local=True) else: stats_entity = self._ers.create_entity(stats_entity_name) stats_entity.set_property_value('activity', metadata['activity']) stats_entity.add_property_value('usage', entity_name) self._ers.persist_entity(stats_entity) # Update the description of the entity metadata['uid'] = str(uid) for key, value in metadata.items(): entity.set_property_value(key, value, private=True) # Persist the result self._ers.persist_entity(entity)
def get_property(self, uid, key): metadata_path = layoutmanager.get_instance().get_metadata_path(uid) property_path = os.path.join(metadata_path, key) if os.path.exists(property_path): return open(property_path, 'r').read() else: return None
def delete(self, uid): """Remove the file associated to a given entry. """ file_path = layoutmanager.get_instance().get_data_path(uid) if os.path.exists(file_path): os.remove(file_path)
def _set_property(self, uid, key, value, md_path=False): if not md_path: md_path = layoutmanager.get_instance().get_metadata_path(uid) # Hack to support activities that still pass properties named as # for example title:text. if ':' in key: key = key.split(':', 1)[0] changed = True fpath = os.path.join(md_path, key) tpath = os.path.join(md_path, '.' + key) # FIXME: this codepath handles raw image data # str() is 8-bit clean right now, but # this won't last. We will need more explicit # handling of strings, int/floats vs raw data if isinstance(value, unicode): value = value.encode('utf-8') elif not isinstance(value, basestring): value = str(value) # avoid pointless writes; replace atomically if os.path.exists(fpath): stored_val = open(fpath, 'r').read() if stored_val == value: changed = False if changed: f = open(tpath, 'w') f.write(value) f.close() os.rename(tpath, fpath)
def _migrate_metadata(root_path, old_root_path, uid): metadata_path = layoutmanager.get_instance().get_metadata_path(uid) old_metadata_path = os.path.join(old_root_path, uid + '.metadata') metadata = json.load(open(old_metadata_path, 'r')) if 'uid' not in metadata: metadata['uid'] = uid if 'timestamp' not in metadata and 'mtime' in metadata: metadata['timestamp'] = \ time.mktime(time.strptime(metadata['mtime'], DATE_FORMAT)) for key, value in metadata.items(): try: f = open(os.path.join(metadata_path, key), 'w') try: if isinstance(value, unicode): value = value.encode('utf-8') if not isinstance(value, basestring): value = str(value) f.write(value) finally: f.close() except Exception: logging.exception( 'Error while migrating property %s of entry %s', key, uid)
def find(self, query, properties): logging.debug('datastore.find %r', query) t = time.time() if not self._index_updating: try: uids, count = self._index_store.find(query) except Exception: logging.exception('Failed to query index, will rebuild') self._rebuild_index() if self._index_updating: logging.warning('Index updating, returning all entries') return self._find_all(query, properties) entries = [] for uid in uids: entry_path = layoutmanager.get_instance().get_entry_path(uid) if not os.path.exists(entry_path): logging.warning( 'Inconsistency detected, returning all entries') self._rebuild_index() return self._find_all(query, properties) metadata = self._metadata_store.retrieve(uid, properties) self._fill_internal_props(metadata, uid, properties) entries.append(metadata) logger.debug('find(): %r', time.time() - t) return entries, count
def _identical_file_already_exists(self, checksum): """Check if we already have files with this checksum. """ checksums_dir = layoutmanager.get_instance().get_checksums_dir() checksum_path = os.path.join(checksums_dir, checksum) return os.path.exists(checksum_path)
def _migrate_metadata(root_path, old_root_path, uid): metadata_path = layoutmanager.get_instance().get_metadata_path(uid) old_metadata_path = os.path.join(old_root_path, uid + '.metadata') metadata = json.load(open(old_metadata_path, 'r')) if 'uid' not in metadata: metadata['uid'] = uid if 'timestamp' not in metadata and 'mtime' in metadata: metadata['timestamp'] = \ time.mktime(time.strptime(metadata['mtime'], DATE_FORMAT)) for key, value in list(metadata.items()): try: f = open(os.path.join(metadata_path, key), 'w') try: if isinstance(value, str): value = value.encode('utf-8') if not isinstance(value, str): value = str(value) f.write(value) finally: f.close() except Exception: logging.exception( 'Error while migrating property %s of entry %s', key, uid)
def _get_uid_from_checksum(self, checksum): """Get an existing entry which file matches checksum. """ checksums_dir = layoutmanager.get_instance().get_checksums_dir() checksum_path = os.path.join(checksums_dir, checksum) first_uid = os.listdir(checksum_path)[0] return first_uid
def _create_checksum_dir(self, checksum): """Create directory that tracks files with this same checksum. """ checksums_dir = layoutmanager.get_instance().get_checksums_dir() checksum_path = os.path.join(checksums_dir, checksum) logging.debug('create dir %r', checksum_path) os.mkdir(checksum_path)
def _already_linked(self, uid, checksum): """Check if this entry's file is already a hard link to the checksums dir. """ checksums_dir = layoutmanager.get_instance().get_checksums_dir() checksum_path = os.path.join(checksums_dir, checksum) return os.path.exists(os.path.join(checksum_path, uid))
def _update_index(self): """Find entries that are not yet in the index and add them.""" uids = layoutmanager.get_instance().find_all() logging.debug('Going to update the index with object_ids %r', uids) self._index_updating = True GObject.idle_add(lambda: self.__update_index_cb(uids), priority=GObject.PRIORITY_LOW)
def _update_index(self): """Find entries that are not yet in the index and add them.""" uids = layoutmanager.get_instance().find_all() logging.debug('Going to update the index with object_ids %r', uids) self._index_updating = True GLib.idle_add(lambda: self.__update_index_cb(uids), priority=GLib.PRIORITY_LOW)
def __init__(self, **options): bus_name = dbus.service.BusName(DS_SERVICE, bus=dbus.SessionBus(), replace_existing=False, allow_replacement=False) dbus.service.Object.__init__(self, bus_name, DS_OBJECT_PATH) migrated, initiated = self._open_layout() self._metadata_store = MetadataStore() self._file_store = FileStore() self._optimizer = Optimizer(self._file_store, self._metadata_store) self._index_store = IndexStore() self._index_updating = False root_path = layoutmanager.get_instance().get_root_path() self._cleanflag = os.path.join(root_path, 'ds_clean') if initiated: logging.debug('Initiate datastore') self._rebuild_index() self._index_store.flush() self._mark_clean() return if migrated: self._rebuild_index() self._mark_clean() return rebuild = False stat = os.statvfs(root_path) da = stat.f_bavail * stat.f_bsize if not self._index_store.index_updated: logging.warn('Index is not up-to-date') rebuild = True elif not os.path.exists(self._cleanflag): logging.warn('DS state is not clean') rebuild = True elif da < MIN_INDEX_FREE_BYTES: logging.warn('Disk space tight for index') rebuild = True if rebuild: logging.warn('Trigger index rebuild') self._rebuild_index() else: # fast path try: self._index_store.open_index() except BaseException: logging.exception('Failed to open index') # try... self._rebuild_index() self._mark_clean() return
def __init__(self, **options): bus_name = dbus.service.BusName(DS_SERVICE, bus=dbus.SessionBus(), replace_existing=False, allow_replacement=False) dbus.service.Object.__init__(self, bus_name, DS_OBJECT_PATH) migrated, initiated = self._open_layout() self._metadata_store = MetadataStore() self._file_store = FileStore() self._optimizer = Optimizer(self._file_store, self._metadata_store) self._index_store = IndexStore() self._index_updating = False root_path = layoutmanager.get_instance().get_root_path() self._cleanflag = os.path.join(root_path, 'ds_clean') if initiated: logging.debug('Initiate datastore') self._rebuild_index() self._index_store.flush() self._mark_clean() return if migrated: self._rebuild_index() self._mark_clean() return rebuild = False stat = os.statvfs(root_path) da = stat.f_bavail * stat.f_bsize if not self._index_store.index_updated: logging.warn('Index is not up-to-date') rebuild = True elif not os.path.exists(self._cleanflag): logging.warn('DS state is not clean') rebuild = True elif da < MIN_INDEX_FREE_BYTES: logging.warn('Disk space tight for index') rebuild = True if rebuild: logging.warn('Trigger index rebuild') self._rebuild_index() else: # fast path try: self._index_store.open_index() except: logging.exception('Failed to open index') # try... self._rebuild_index() self._mark_clean() return
def _add_checksum_entry(self, uid, checksum): """Create a file in the checksum dir with the uid of the entry """ checksums_dir = layoutmanager.get_instance().get_checksums_dir() checksum_path = os.path.join(checksums_dir, checksum) logging.debug('touch %r', os.path.join(checksum_path, uid)) open(os.path.join(checksum_path, uid), 'w').close()
def delete(self, uid): ''' Delete a journal entry ''' # Name of the entry entity_name = layoutmanager.get_instance().get_entity_name(uid) # Delete self._ers.delete_entity(entity_name)
def contains(self, uid): ''' Check if there is a journal entry with the given UID ''' # Name of the entry entity_name = layoutmanager.get_instance().get_entity_name(uid) # Tells if the UID is in the store or not return self._ers.contains_entity(entity_name)
def __update_index_cb(self, uids): if uids: uid = uids.pop() logging.debug('Updating entry %r in index. %d to go.', uid, len(uids)) if not self._index_store.contains(uid): try: update_metadata = False props = self._metadata_store.retrieve(uid) if 'filesize' not in props: path = self._file_store.get_file_path(uid) if os.path.exists(path): props['filesize'] = os.stat(path).st_size update_metadata = True if 'timestamp' not in props: props['timestamp'] = str(int(time.time())) update_metadata = True if 'creation_time' not in props: if 'ctime' in props: try: props['creation_time'] = time.mktime( time.strptime( props['ctime'], migration.DATE_FORMAT)) except (TypeError, ValueError): pass if 'creation_time' not in props: props['creation_time'] = props['timestamp'] update_metadata = True if update_metadata: self._metadata_store.store(uid, props) self._index_store.store(uid, props) except Exception: logging.exception('Error processing %r', uid) logging.warn('Will attempt to delete corrupt entry %r', uid) try: # self.delete(uid) only works on well-formed # entries :-/ entry_path = \ layoutmanager.get_instance().get_entry_path(uid) shutil.rmtree(entry_path) except Exception: logging.exception('Error deleting corrupt entry %r', uid) if not uids: self._index_store.flush() self._index_updating = False logging.debug('Finished updating index.') return False else: return True
def store(self, uid, metadata): metadata_path = layoutmanager.get_instance().get_metadata_path(uid) if not os.path.exists(metadata_path): os.makedirs(metadata_path) else: received_keys = metadata.keys() for key in os.listdir(metadata_path): if key not in _INTERNAL_KEYS and key not in received_keys: os.remove(os.path.join(metadata_path, key)) metadata['uid'] = uid for key, value in metadata.items(): self._set_property(uid, key, value, md_path=metadata_path)
def get_property(self, uid, key): ''' Get a single property ''' # Name of the entry entity_name = layoutmanager.get_instance().get_entity_name(uid) # Get all the (accessible) documents describing that identifier entity = self._ers.get_entity(entity_name) # Get all the properties description = entity.get_properties() # Return the result return description[key]
def optimize(self, uid): """Add an entry to a queue of entries to be checked for duplicates. """ if not os.path.exists(self._file_store.get_file_path(uid)): return queue_path = layoutmanager.get_instance().get_queue_path() open(os.path.join(queue_path, uid), 'w').close() logging.debug('optimize %r', os.path.join(queue_path, uid)) if self._enqueue_checksum_id is None: self._enqueue_checksum_id = \ GLib.idle_add(self._process_entry_cb, priority=GLib.PRIORITY_LOW)
def optimize(self, uid): """Add an entry to a queue of entries to be checked for duplicates. """ if not os.path.exists(self._file_store.get_file_path(uid)): return queue_path = layoutmanager.get_instance().get_queue_path() open(os.path.join(queue_path, uid), 'w').close() logging.debug('optimize %r', os.path.join(queue_path, uid)) if self._enqueue_checksum_id is None: self._enqueue_checksum_id = \ GObject.idle_add(self._process_entry_cb, priority=GObject.PRIORITY_LOW)
def retrieve(self, uid, properties=None): ''' Retrieve some properties of a journal entry ''' # Name of the entry entity_name = layoutmanager.get_instance().get_entity_name(uid) # Get all the (accessible) documents describing that identifier entity = self._ers.get_entity(entity_name) # Get all the properties description = entity.get_properties() # TODO if properties != None filter the output return description
def _find_all(self, query, properties): uids = layoutmanager.get_instance().find_all() count = len(uids) offset = query.get('offset', 0) limit = query.get('limit', MAX_QUERY_LIMIT) uids = uids[offset:offset + limit] entries = [] for uid in uids: metadata = self._metadata_store.retrieve(uid, properties) self._fill_internal_props(metadata, uid, properties) entries.append(metadata) return entries, count
def store(self, uid, file_path, transfer_ownership, completion_cb): """Store a file for a given entry. """ dir_path = layoutmanager.get_instance().get_entry_path(uid) if not os.path.exists(dir_path): os.makedirs(dir_path) destination_path = layoutmanager.get_instance().get_data_path(uid) if file_path: if not os.path.isfile(file_path): raise ValueError('No file at %r' % file_path) if os.path.islink(file_path): # Can't keep symlinks (especially pointed to removable medias). # Later, optimizer will help with saving duplicates file_path = os.path.realpath(file_path) # We should not move original file transfer_ownership = False if transfer_ownership: try: logging.debug('FileStore moving from %r to %r', file_path, destination_path) os.rename(file_path, destination_path) completion_cb() except OSError, e: if e.errno == errno.EXDEV: self._async_copy(file_path, destination_path, completion_cb, unlink_src=True) else: raise else: self._async_copy(file_path, destination_path, completion_cb, unlink_src=False) """
def retrieve(self, uid, user_id, extension): """Place the file associated to a given entry into a directory where the user can read it. The caller is reponsible for deleting this file. """ file_path = layoutmanager.get_instance().get_data_path(uid) if not os.path.exists(file_path): logger.debug('Entry %r doesnt have any file', uid) return '' use_instance_dir = os.path.exists('/etc/olpc-security') and \ os.getuid() != user_id if use_instance_dir: if not user_id: raise ValueError('Couldnt determine the current user uid.') destination_dir = os.path.join(os.environ['HOME'], 'isolation', '1', 'uid_to_instance_dir', str(user_id)) else: destination_dir = env.get_profile_path('data') if not os.path.exists(destination_dir): os.makedirs(destination_dir) if extension is None: extension = '' elif extension: extension = '.' + extension fd, destination_path = tempfile.mkstemp(prefix=uid + '_', suffix=extension, dir=destination_dir) os.close(fd) os.unlink(destination_path) # Try to hard link from the original file to the targetpath. This can # fail if the file is in a different filesystem. Do a symlink instead. try: os.link(file_path, destination_path) except OSError as e: if e.errno == errno.EXDEV: os.symlink(file_path, destination_path) else: raise return destination_path
def retrieve(self, uid, user_id, extension): """Place the file associated to a given entry into a directory where the user can read it. The caller is reponsible for deleting this file. """ file_path = layoutmanager.get_instance().get_data_path(uid) if not os.path.exists(file_path): logging.debug('Entry %r doesnt have any file', uid) return '' use_instance_dir = os.path.exists('/etc/olpc-security') and \ os.getuid() != user_id if use_instance_dir: if not user_id: raise ValueError('Couldnt determine the current user uid.') destination_dir = os.path.join(os.environ['HOME'], 'isolation', '1', 'uid_to_instance_dir', str(user_id)) else: profile = os.environ.get('SUGAR_PROFILE', 'default') destination_dir = os.path.join(os.path.expanduser('~'), '.sugar', profile, 'data') if not os.path.exists(destination_dir): os.makedirs(destination_dir) if extension is None: extension = '' elif extension: extension = '.' + extension fd, destination_path = tempfile.mkstemp(prefix=uid + '_', suffix=extension, dir=destination_dir) os.close(fd) os.unlink(destination_path) # Try to hard link from the original file to the targetpath. This can # fail if the file is in a different filesystem. Do a symlink instead. try: os.link(file_path, destination_path) except OSError, e: if e.errno == errno.EXDEV: os.symlink(file_path, destination_path) else: raise
def set_property(self, uid, key, value): ''' Set a single property ''' # Name of the entry entity_name = layoutmanager.get_instance().get_entity_name(uid) # Get all the (accessible) documents describing that identifier entity = self._ers.get_entity(entity_name) # Set new value #if isinstance(value, unicode): # value = value.encode('utf-8') #elif not isinstance(value, basestring): # value = str(value) entity.set_property_value(key, value, private=True) # Persist self._ers.persist_entity(entity)
def _rebuild_index(self): """Remove and recreate index.""" self._index_store.close_index() self._index_store.remove_index() # rebuild the index in tmpfs to better handle ENOSPC temp_index_path = tempfile.mkdtemp(prefix='sugar-datastore-index-') logger.debug('Rebuilding index in %s' % temp_index_path) self._index_store.open_index(temp_path=temp_index_path) self._update_index() self._index_store.close_index() on_disk = False # can we fit the index on disk? get disk usage in bytes... index_du = subprocess.check_output(['/usr/bin/du', '-bs', temp_index_path]) index_du = int(index_du.split(b'\t')[0]) # disk available, in bytes stat = os.statvfs(temp_index_path) da = stat.f_bavail * stat.f_bsize if da > (index_du * 1.2) and da > MIN_INDEX_FREE_BYTES: # 1.2 due to 20% room for growth logger.debug('Attempting to move tempfs index to disk') # move to internal disk try: index_path = layoutmanager.get_instance().get_index_path() if os.path.exists(index_path): shutil.rmtree(index_path) shutil.copytree(temp_index_path, index_path) shutil.rmtree(temp_index_path) on_disk = True except Exception: logger.exception('Error copying tempfs index to disk,' 'revert to using tempfs index.') else: logger.warn("Not enough disk space, using tempfs index") if on_disk: self._index_store.open_index() else: self._index_store.open_index(temp_path=temp_index_path)
def _rebuild_index(self): """Remove and recreate index.""" self._index_store.close_index() self._index_store.remove_index() # rebuild the index in tmpfs to better handle ENOSPC temp_index_path = tempfile.mkdtemp(prefix='sugar-datastore-index-') logger.warn('Rebuilding index in %s' % temp_index_path) self._index_store.open_index(temp_path=temp_index_path) self._update_index() self._index_store.close_index() on_disk = False # can we fit the index on disk? get disk usage in bytes... index_du = subprocess.check_output(['/usr/bin/du', '-bs', temp_index_path]) index_du = int(index_du.split('\t')[0]) # disk available, in bytes stat = os.statvfs(temp_index_path) da = stat.f_bavail * stat.f_bsize if da > (index_du * 1.2) and da > MIN_INDEX_FREE_BYTES: # 1.2 due to 20% room for growth logger.warn('Attempting to move tempfs index to disk') # move to internal disk try: index_path = layoutmanager.get_instance().get_index_path() if os.path.exists(index_path): shutil.rmtree(index_path) shutil.copytree(temp_index_path, index_path) shutil.rmtree(temp_index_path) on_disk = True except Exception: logger.exception('Error copying tempfs index to disk,' 'revert to using tempfs index.') else: logger.warn("Not enough disk space, using tempfs index") if on_disk: self._index_store.open_index() else: self._index_store.open_index(temp_path=temp_index_path)
def delete(self, uid): self._mark_dirty() try: entry_path = layoutmanager.get_instance().get_entry_path(uid) self._optimizer.remove(uid) self._index_store.delete(uid) self._file_store.delete(uid) self._metadata_store.delete(uid) # remove the dirtree shutil.rmtree(entry_path) try: # will remove the hashed dir if nothing else is there os.removedirs(os.path.dirname(entry_path)) except BaseException: pass except BaseException: logger.exception('Exception deleting entry') raise self.Deleted(uid) logger.debug('deleted %s', uid) self._mark_clean()
def _set_property(self, uid, key, value, md_path=False): """Set a property in metadata store Value datatypes are almost entirely dbus.String, with exceptions for certain keys as follows; * "timestamp", and "creation_time" of dbus.Int32, * "preview" of dbus.ByteArray, * "filesize" of int, and * "checksum" of str. """ if not md_path: md_path = layoutmanager.get_instance().get_metadata_path(uid) # Hack to support activities that still pass properties named as # for example title:text. if ':' in key: key = key.split(':', 1)[0] changed = True fpath = os.path.join(md_path, key) tpath = os.path.join(md_path, '.' + key) if isinstance(value, int): # int or dbus.Int32 value = str(value).encode() elif isinstance(value, str): # str or dbus.String value = value.encode() # avoid pointless writes; replace atomically if os.path.exists(fpath): f = open(fpath, 'rb') stored_val = f.read() f.close() if stored_val == value: changed = False if changed: f = open(tpath, 'wb') f.write(value) f.close() os.rename(tpath, fpath)
def _process_entry_cb(self): """Process one item in the checksums queue by calculating its checksum, checking if there exist already an identical file, and in that case substituting its file with a hard link to that pre-existing file. """ queue_path = layoutmanager.get_instance().get_queue_path() queue = os.listdir(queue_path) if queue: uid = queue[0] logging.debug('_process_entry_cb processing %r', uid) file_in_entry_path = self._file_store.get_file_path(uid) if not os.path.exists(file_in_entry_path): logging.info('non-existent entry in queue: %r', uid) else: checksum = self._calculate_md5sum(file_in_entry_path) self._metadata_store.set_property(uid, 'checksum', checksum) if self._identical_file_already_exists(checksum): if not self._already_linked(uid, checksum): existing_entry_uid = \ self._get_uid_from_checksum(checksum) self._file_store.hard_link_entry(uid, existing_entry_uid) self._add_checksum_entry(uid, checksum) else: self._create_checksum_dir(checksum) self._add_checksum_entry(uid, checksum) os.remove(os.path.join(queue_path, uid)) if len(queue) <= 1: self._enqueue_checksum_id = None return False else: return True
def _process_entry_cb(self): """Process one item in the checksums queue by calculating its checksum, checking if there exist already an identical file, and in that case substituting its file with a hard link to that pre-existing file. """ queue_path = layoutmanager.get_instance().get_queue_path() queue = os.listdir(queue_path) if queue: uid = queue[0] logging.debug('_process_entry_cb processing %r', uid) file_in_entry_path = self._file_store.get_file_path(uid) if not os.path.exists(file_in_entry_path): logging.info('non-existent entry in queue: %r', uid) else: checksum = self._calculate_md5sum(file_in_entry_path) self._metadata_store.set_property(uid, 'checksum', checksum) if self._identical_file_already_exists(checksum): if not self._already_linked(uid, checksum): existing_entry_uid = \ self._get_uid_from_checksum(checksum) self._file_store.hard_link_entry( uid, existing_entry_uid) self._add_checksum_entry(uid, checksum) else: self._create_checksum_dir(checksum) self._add_checksum_entry(uid, checksum) os.remove(os.path.join(queue_path, uid)) if len(queue) <= 1: self._enqueue_checksum_id = None return False else: return True
def delete(self, uid): self._mark_dirty() try: entry_path = layoutmanager.get_instance().get_entry_path(uid) self._optimizer.remove(uid) self._index_store.delete(uid) self._file_store.delete(uid) self._metadata_store.delete(uid) # remove the dirtree shutil.rmtree(entry_path) try: # will remove the hashed dir if nothing else is there os.removedirs(os.path.dirname(entry_path)) except: pass except: logger.exception('Exception deleting entry') raise self.Deleted(uid) logger.debug('deleted %s', uid) self._mark_clean()
def remove(self, uid): """Remove any structures left from space optimization """ checksum = self._metadata_store.get_property(uid, 'checksum') if checksum is None: return checksums_dir = layoutmanager.get_instance().get_checksums_dir() checksum_path = os.path.join(checksums_dir, checksum) checksum_entry_path = os.path.join(checksum_path, uid) if os.path.exists(checksum_entry_path): logging.debug('remove %r', checksum_entry_path) os.remove(checksum_entry_path) if os.path.exists(checksum_path): try: os.rmdir(checksum_path) logging.debug('removed %r', checksum_path) except OSError as e: if e.errno != errno.ENOTEMPTY: raise
def _open_layout(self): """Open layout manager, check version of data store on disk and migrate if necessary. Returns a pair of booleans. For the first, True if migration was done and an index rebuild is required. For the second, True if datastore was just initiated. """ layout_manager = layoutmanager.get_instance() if layout_manager.is_empty(): layout_manager.set_version(layoutmanager.CURRENT_LAYOUT_VERSION) return False, True old_version = layout_manager.get_version() if old_version == layoutmanager.CURRENT_LAYOUT_VERSION: return False, False if old_version == 0: migration.migrate_from_0() layout_manager.set_version(layoutmanager.CURRENT_LAYOUT_VERSION) return True, False
def retrieve(self, uid, properties=None): """Retrieve metadata for an object from the store. Values are read as dbus.ByteArray, then converted to expected types. """ metadata_path = layoutmanager.get_instance().get_metadata_path(uid) if properties is not None: properties = [x.encode('utf-8') if isinstance(x, str) else x for x in properties] metadata = metadatareader.retrieve(metadata_path, properties) # convert from dbus.ByteArray to expected types for key, value in metadata.items(): if key in ['filesize', 'creation_time', 'timestamp']: metadata[key] = dbus.Int32(value) elif key in ['checksum']: metadata[key] = value.decode() elif key != 'preview': metadata[key] = dbus.String(value.decode()) return metadata
def _migrate_file(root_path, old_root_path, uid): if os.path.exists(os.path.join(old_root_path, uid)): new_data_path = layoutmanager.get_instance().get_data_path(uid) os.rename(os.path.join(old_root_path, uid), new_data_path)
def _migrate_preview(root_path, old_root_path, uid): metadata_path = layoutmanager.get_instance().get_metadata_path(uid) os.rename(os.path.join(old_root_path, 'preview', uid), os.path.join(metadata_path, 'preview'))