def DeleteFiles(self, file_service_id, tag_service_id, hash_ids, hash_id_table_name): ( cache_display_current_mappings_table_name, cache_display_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id) # temp hashes to mappings current_mapping_ids_raw = self._Execute( 'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'. format(hash_id_table_name, cache_display_current_mappings_table_name)).fetchall() current_mapping_ids_dict = HydrusData.BuildKeyToSetDict( current_mapping_ids_raw) # temp hashes to mappings pending_mapping_ids_raw = self._Execute( 'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'. format(hash_id_table_name, cache_display_pending_mappings_table_name)).fetchall() pending_mapping_ids_dict = HydrusData.BuildKeyToSetDict( pending_mapping_ids_raw) all_ids_seen = set(current_mapping_ids_dict.keys()) all_ids_seen.update(pending_mapping_ids_dict.keys()) counts_cache_changes = [] for tag_id in all_ids_seen: current_hash_ids = current_mapping_ids_dict[tag_id] num_current = len(current_hash_ids) # pending_hash_ids = pending_mapping_ids_dict[tag_id] num_pending = len(pending_hash_ids) counts_cache_changes.append((tag_id, num_current, num_pending)) self._ExecuteMany( 'DELETE FROM ' + cache_display_current_mappings_table_name + ' WHERE hash_id = ?;', ((hash_id, ) for hash_id in hash_ids)) self._ExecuteMany( 'DELETE FROM ' + cache_display_pending_mappings_table_name + ' WHERE hash_id = ?;', ((hash_id, ) for hash_id in hash_ids)) if len(counts_cache_changes) > 0: self.modules_mappings_counts_update.ReduceCounts( ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id, counts_cache_changes)
def GetTagsToDescendants(self, display_type: int, tag_service_id: int, ideal_tag_ids: typing.Collection[int]): if len(ideal_tag_ids) == 0: return {} elif len(ideal_tag_ids) == 1: (ideal_tag_id, ) = ideal_tag_ids descendants = self.GetDescendants(display_type, tag_service_id, ideal_tag_id) return {ideal_tag_id: descendants} cache_tag_parents_lookup_table_name = GenerateTagParentsLookupCacheTableName( display_type, tag_service_id) with self._MakeTemporaryIntegerTable( ideal_tag_ids, 'ancestor_tag_id') as temp_table_name: tag_ids_to_descendants = HydrusData.BuildKeyToSetDict( self._Execute( 'SELECT ancestor_tag_id, child_tag_id FROM {} CROSS JOIN {} USING ( ancestor_tag_id );' .format(temp_table_name, cache_tag_parents_lookup_table_name))) for ideal_tag_id in ideal_tag_ids: if ideal_tag_id not in tag_ids_to_descendants: tag_ids_to_descendants[ideal_tag_id] = set() return tag_ids_to_descendants
def GetHashIdsToURLs(self, hash_ids_table_name=None): hash_ids_to_urls = {} if hash_ids_table_name is not None: hash_ids_to_urls = HydrusData.BuildKeyToSetDict( self._Execute( 'SELECT hash_id, url FROM {} CROSS JOIN url_map USING ( hash_id ) CROSS JOIN urls USING ( url_id );' .format(hash_ids_table_name))) return hash_ids_to_urls
def GetIdealsToChains(self, display_type, tag_service_id, ideal_tag_ids): # this only takes ideal_tag_ids if len(ideal_tag_ids) == 0: return {} elif len(ideal_tag_ids) == 1: (ideal_tag_id, ) = ideal_tag_ids chain_tag_ids = self.GetChainMembersFromIdeal( display_type, tag_service_id, ideal_tag_id) return {ideal_tag_id: chain_tag_ids} cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( display_type, tag_service_id) with self._MakeTemporaryIntegerTable( ideal_tag_ids, 'ideal_tag_id') as temp_table_name: # temp tags to lookup ideal_tag_ids_to_chain_members = HydrusData.BuildKeyToSetDict( self._Execute( 'SELECT ideal_tag_id, bad_tag_id FROM {} CROSS JOIN {} USING ( ideal_tag_id );' .format(temp_table_name, cache_tag_siblings_lookup_table_name))) # this returns ideal in the chain, and chains of size 1 for ideal_tag_id in ideal_tag_ids: ideal_tag_ids_to_chain_members[ideal_tag_id].add(ideal_tag_id) return ideal_tag_ids_to_chain_members
def AddFiles(self, file_service_id, tag_service_id, hash_ids, hash_ids_table_name): ( cache_display_current_mappings_table_name, cache_display_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id) (cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id) # temp hashes to mappings storage_current_mapping_ids_raw = self._Execute( 'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'. format(hash_ids_table_name, cache_current_mappings_table_name)).fetchall() storage_current_mapping_ids_dict = HydrusData.BuildKeyToSetDict( storage_current_mapping_ids_raw) # temp hashes to mappings storage_pending_mapping_ids_raw = self._Execute( 'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'. format(hash_ids_table_name, cache_pending_mappings_table_name)).fetchall() storage_pending_mapping_ids_dict = HydrusData.BuildKeyToSetDict( storage_pending_mapping_ids_raw) all_storage_tag_ids = set(storage_current_mapping_ids_dict.keys()) all_storage_tag_ids.update(storage_pending_mapping_ids_dict.keys()) storage_tag_ids_to_implies_tag_ids = self.modules_tag_display.GetTagsToImplies( ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, all_storage_tag_ids) display_tag_ids_to_implied_by_tag_ids = collections.defaultdict(set) for (storage_tag_id, implies_tag_ids) in storage_tag_ids_to_implies_tag_ids.items(): for implies_tag_id in implies_tag_ids: display_tag_ids_to_implied_by_tag_ids[implies_tag_id].add( storage_tag_id) counts_cache_changes = [] # for all display tags implied by the existing storage mappings, add them # btw, when we add files to a specific domain, we know that all inserts are new for (display_tag_id, implied_by_tag_ids ) in display_tag_ids_to_implied_by_tag_ids.items(): display_current_hash_ids = set( itertools.chain.from_iterable( (storage_current_mapping_ids_dict[implied_by_tag_id] for implied_by_tag_id in implied_by_tag_ids))) current_delta = len(display_current_hash_ids) if current_delta > 0: self._ExecuteMany( 'INSERT OR IGNORE INTO ' + cache_display_current_mappings_table_name + ' ( hash_id, tag_id ) VALUES ( ?, ? );', ((hash_id, display_tag_id) for hash_id in display_current_hash_ids)) # display_pending_hash_ids = set( itertools.chain.from_iterable( (storage_pending_mapping_ids_dict[implied_by_tag_id] for implied_by_tag_id in implied_by_tag_ids))) pending_delta = len(display_pending_hash_ids) if pending_delta > 0: self._ExecuteMany( 'INSERT OR IGNORE INTO ' + cache_display_pending_mappings_table_name + ' ( hash_id, tag_id ) VALUES ( ?, ? );', ((hash_id, display_tag_id) for hash_id in display_pending_hash_ids)) # if current_delta > 0 or pending_delta > 0: counts_cache_changes.append( (display_tag_id, current_delta, pending_delta)) if len(counts_cache_changes) > 0: self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id, counts_cache_changes)
def AddFiles( self, file_service_id, tag_service_id, hash_ids, hash_ids_table_name ): ( cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id ) ( current_mappings_table_name, deleted_mappings_table_name, pending_mappings_table_name, petitioned_mappings_table_name ) = ClientDBMappingsStorage.GenerateMappingsTableNames( tag_service_id ) # deleted don't have a/c counts to update, so we can do it all in one go here self._Execute( 'INSERT OR IGNORE INTO {} ( hash_id, tag_id ) SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( cache_deleted_mappings_table_name, hash_ids_table_name, deleted_mappings_table_name ) ) # temp hashes to mappings current_mapping_ids_raw = self._Execute( 'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( hash_ids_table_name, current_mappings_table_name ) ).fetchall() current_mapping_ids_dict = HydrusData.BuildKeyToSetDict( current_mapping_ids_raw ) # temp hashes to mappings pending_mapping_ids_raw = self._Execute( 'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( hash_ids_table_name, pending_mappings_table_name ) ).fetchall() pending_mapping_ids_dict = HydrusData.BuildKeyToSetDict( pending_mapping_ids_raw ) all_ids_seen = set( current_mapping_ids_dict.keys() ) all_ids_seen.update( pending_mapping_ids_dict.keys() ) counts_cache_changes = [] for tag_id in all_ids_seen: current_hash_ids = current_mapping_ids_dict[ tag_id ] current_delta = len( current_hash_ids ) if current_delta > 0: self._ExecuteMany( 'INSERT OR IGNORE INTO ' + cache_current_mappings_table_name + ' ( hash_id, tag_id ) VALUES ( ?, ? );', ( ( hash_id, tag_id ) for hash_id in current_hash_ids ) ) current_delta = self._GetRowCount() # pending_hash_ids = pending_mapping_ids_dict[ tag_id ] pending_delta = len( pending_hash_ids ) if pending_delta > 0: self._ExecuteMany( 'INSERT OR IGNORE INTO ' + cache_pending_mappings_table_name + ' ( hash_id, tag_id ) VALUES ( ?, ? );', ( ( hash_id, tag_id ) for hash_id in pending_hash_ids ) ) pending_delta = self._GetRowCount() # if current_delta > 0 or pending_delta > 0: counts_cache_changes.append( ( tag_id, current_delta, pending_delta ) ) if len( counts_cache_changes ) > 0: self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, counts_cache_changes )
def GetRepositoryUpdateHashesICanProcess(self, service_key: bytes, content_types_to_process): # it is important that we use lists and sort by update index! # otherwise add/delete actions can occur in the wrong order service_id = self.modules_services.GetServiceId(service_key) (repository_updates_table_name, repository_unregistered_updates_table_name, repository_updates_processed_table_name ) = GenerateRepositoryUpdatesTableNames(service_id) result = self._Execute( 'SELECT 1 FROM {} WHERE content_type = ? AND processed = ?;'. format(repository_updates_processed_table_name), (HC.CONTENT_TYPE_DEFINITIONS, True)).fetchone() this_is_first_definitions_work = result is None result = self._Execute( 'SELECT 1 FROM {} WHERE content_type != ? AND processed = ?;'. format(repository_updates_processed_table_name), (HC.CONTENT_TYPE_DEFINITIONS, True)).fetchone() this_is_first_content_work = result is None min_unregistered_update_index = None result = self._Execute( 'SELECT MIN( update_index ) FROM {} CROSS JOIN {} USING ( hash_id );' .format(repository_unregistered_updates_table_name, repository_updates_table_name)).fetchone() if result is not None: (min_unregistered_update_index, ) = result predicate_phrase = 'processed = ? AND content_type IN {}'.format( HydrusData.SplayListForDB(content_types_to_process)) if min_unregistered_update_index is not None: # can't process an update if any of its files are as yet unregistered (these are both unprocessed and unavailable) # also, we mustn't skip any update indices, so if there is an invalid one, we won't do any after that! predicate_phrase = '{} AND update_index < {}'.format( predicate_phrase, min_unregistered_update_index) query = 'SELECT update_index, hash_id, content_type FROM {} CROSS JOIN {} USING ( hash_id ) WHERE {};'.format( repository_updates_processed_table_name, repository_updates_table_name, predicate_phrase) rows = self._Execute(query, (False, )).fetchall() update_indices_to_unprocessed_hash_ids = HydrusData.BuildKeyToSetDict( ((update_index, hash_id) for (update_index, hash_id, content_type) in rows)) hash_ids_to_content_types_to_process = HydrusData.BuildKeyToSetDict( ((hash_id, content_type) for (update_index, hash_id, content_type) in rows)) all_hash_ids = set(hash_ids_to_content_types_to_process.keys()) all_local_hash_ids = self.modules_files_storage.FilterHashIdsToStatus( self.modules_services.local_update_service_id, all_hash_ids, HC.CONTENT_STATUS_CURRENT) for sorted_update_index in sorted( update_indices_to_unprocessed_hash_ids.keys()): unprocessed_hash_ids = update_indices_to_unprocessed_hash_ids[ sorted_update_index] if not unprocessed_hash_ids.issubset(all_local_hash_ids): # can't process an update if any of its unprocessed files are not local # normally they'll always be available if registered, but just in case a user deletes one manually etc... # also, we mustn't skip any update indices, so if there is an invalid one, we won't do any after that! update_indices_to_unprocessed_hash_ids = { update_index: unprocessed_hash_ids for (update_index, unprocessed_hash_ids ) in update_indices_to_unprocessed_hash_ids.items() if update_index < sorted_update_index } break # all the hashes are now good to go all_hash_ids = set( itertools.chain.from_iterable( update_indices_to_unprocessed_hash_ids.values())) hash_ids_to_hashes = self.modules_hashes_local_cache.GetHashIdsToHashes( hash_ids=all_hash_ids) definition_hashes_and_content_types = [] content_hashes_and_content_types = [] if len(update_indices_to_unprocessed_hash_ids) > 0: for update_index in sorted( update_indices_to_unprocessed_hash_ids.keys()): unprocessed_hash_ids = update_indices_to_unprocessed_hash_ids[ update_index] definition_hash_ids = { hash_id for hash_id in unprocessed_hash_ids if HC.CONTENT_TYPE_DEFINITIONS in hash_ids_to_content_types_to_process[hash_id] } content_hash_ids = { hash_id for hash_id in unprocessed_hash_ids if hash_id not in definition_hash_ids } for (hash_ids, hashes_and_content_types) in [ (definition_hash_ids, definition_hashes_and_content_types), (content_hash_ids, content_hashes_and_content_types) ]: hashes_and_content_types.extend( ((hash_ids_to_hashes[hash_id], hash_ids_to_content_types_to_process[hash_id]) for hash_id in hash_ids)) return (this_is_first_definitions_work, definition_hashes_and_content_types, this_is_first_content_work, content_hashes_and_content_types)