Пример #1
0
    def Generate(self,
                 file_service_id,
                 tag_service_id,
                 populate_from_storage=True,
                 status_hook=None):

        table_generation_dict = self._GetServiceTableGenerationDictSingle(
            file_service_id, tag_service_id)

        for (table_name, (create_query_without_name,
                          version_added)) in table_generation_dict.items():

            self._Execute(create_query_without_name.format(table_name))

        if populate_from_storage:

            if status_hook is not None:

                status_hook('copying storage')

            (
                cache_current_mappings_table_name,
                cache_deleted_mappings_table_name,
                cache_pending_mappings_table_name
            ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames(
                file_service_id, tag_service_id)
            (
                cache_display_current_mappings_table_name,
                cache_display_pending_mappings_table_name
            ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames(
                file_service_id, tag_service_id)

            self._Execute(
                'INSERT OR IGNORE INTO {} ( hash_id, tag_id ) SELECT hash_id, tag_id FROM {};'
                .format(cache_display_current_mappings_table_name,
                        cache_current_mappings_table_name))
            self._Execute(
                'INSERT OR IGNORE INTO {} ( hash_id, tag_id ) SELECT hash_id, tag_id FROM {};'
                .format(cache_display_pending_mappings_table_name,
                        cache_pending_mappings_table_name))

        self.modules_mappings_counts.CreateTables(
            ClientTags.TAG_DISPLAY_ACTUAL,
            file_service_id,
            tag_service_id,
            populate_from_storage=populate_from_storage)

        if status_hook is not None:

            status_hook('optimising data')

        index_generation_dict = self._GetServiceIndexGenerationDictSingle(
            file_service_id, tag_service_id)

        for (table_name, columns, unique, version_added
             ) in self._FlattenIndexGenerationDict(index_generation_dict):

            self._CreateIndex(table_name, columns, unique=unique)
Пример #2
0
    def DeleteFiles(self, file_service_id, tag_service_id, hash_ids,
                    hash_id_table_name):

        (
            cache_display_current_mappings_table_name,
            cache_display_pending_mappings_table_name
        ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames(
            file_service_id, tag_service_id)

        # temp hashes to mappings
        current_mapping_ids_raw = self._Execute(
            'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.
            format(hash_id_table_name,
                   cache_display_current_mappings_table_name)).fetchall()

        current_mapping_ids_dict = HydrusData.BuildKeyToSetDict(
            current_mapping_ids_raw)

        # temp hashes to mappings
        pending_mapping_ids_raw = self._Execute(
            'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.
            format(hash_id_table_name,
                   cache_display_pending_mappings_table_name)).fetchall()

        pending_mapping_ids_dict = HydrusData.BuildKeyToSetDict(
            pending_mapping_ids_raw)

        all_ids_seen = set(current_mapping_ids_dict.keys())
        all_ids_seen.update(pending_mapping_ids_dict.keys())

        counts_cache_changes = []

        for tag_id in all_ids_seen:

            current_hash_ids = current_mapping_ids_dict[tag_id]

            num_current = len(current_hash_ids)

            #

            pending_hash_ids = pending_mapping_ids_dict[tag_id]

            num_pending = len(pending_hash_ids)

            counts_cache_changes.append((tag_id, num_current, num_pending))

        self._ExecuteMany(
            'DELETE FROM ' + cache_display_current_mappings_table_name +
            ' WHERE hash_id = ?;', ((hash_id, ) for hash_id in hash_ids))
        self._ExecuteMany(
            'DELETE FROM ' + cache_display_pending_mappings_table_name +
            ' WHERE hash_id = ?;', ((hash_id, ) for hash_id in hash_ids))

        if len(counts_cache_changes) > 0:

            self.modules_mappings_counts_update.ReduceCounts(
                ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id,
                counts_cache_changes)
    def RegeneratePending(self, tag_service_id, status_hook=None):

        (current_mappings_table_name, deleted_mappings_table_name,
         pending_mappings_table_name, petitioned_mappings_table_name
         ) = ClientDBMappingsStorage.GenerateMappingsTableNames(tag_service_id)

        if status_hook is not None:

            message = 'clearing old combined display data'

            status_hook(message)

        all_pending_storage_tag_ids = self._STS(
            self._Execute('SELECT DISTINCT tag_id FROM {};'.format(
                pending_mappings_table_name)))

        self.modules_mappings_counts.ClearCounts(
            ClientTags.TAG_DISPLAY_STORAGE,
            self.modules_services.combined_file_service_id,
            tag_service_id,
            keep_current=True)

        counts_cache_changes = []

        num_to_do = len(all_pending_storage_tag_ids)

        for (i, storage_tag_id) in enumerate(all_pending_storage_tag_ids):

            if i % 100 == 0 and status_hook is not None:

                message = 'regenerating pending tags {}'.format(
                    HydrusData.ConvertValueRangeToPrettyString(
                        i + 1, num_to_do))

                status_hook(message)

            (pending_delta, ) = self._Execute(
                'SELECT COUNT( DISTINCT hash_id ) FROM {} WHERE tag_id = ?;'.
                format(pending_mappings_table_name),
                (storage_tag_id, )).fetchone()

            counts_cache_changes.append((storage_tag_id, 0, pending_delta))

        self.modules_mappings_counts_update.AddCounts(
            ClientTags.TAG_DISPLAY_STORAGE,
            self.modules_services.combined_file_service_id, tag_service_id,
            counts_cache_changes)

        self.modules_mappings_cache_combined_files_display.RegeneratePending(
            tag_service_id, status_hook=status_hook)
Пример #4
0
    def Drop(self, file_service_id, tag_service_id):

        (
            cache_display_current_mappings_table_name,
            cache_display_pending_mappings_table_name
        ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames(
            file_service_id, tag_service_id)

        self._Execute('DROP TABLE IF EXISTS {};'.format(
            cache_display_current_mappings_table_name))
        self._Execute('DROP TABLE IF EXISTS {};'.format(
            cache_display_pending_mappings_table_name))

        self.modules_mappings_counts.DropTables(ClientTags.TAG_DISPLAY_ACTUAL,
                                                file_service_id,
                                                tag_service_id)
Пример #5
0
    def Clear(self, file_service_id, tag_service_id, keep_pending=False):

        (
            cache_display_current_mappings_table_name,
            cache_display_pending_mappings_table_name
        ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames(
            file_service_id, tag_service_id)

        self._Execute('DELETE FROM {};'.format(
            cache_display_current_mappings_table_name))

        if not keep_pending:

            self._Execute('DELETE FROM {};'.format(
                cache_display_pending_mappings_table_name))

        self.modules_mappings_counts.ClearCounts(ClientTags.TAG_DISPLAY_ACTUAL,
                                                 file_service_id,
                                                 tag_service_id,
                                                 keep_pending=keep_pending)
Пример #6
0
    def _GetServiceTableGenerationDictSingle(self, file_service_id,
                                             tag_service_id):

        table_dict = {}

        (
            cache_display_current_mappings_table_name,
            cache_display_pending_mappings_table_name
        ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames(
            file_service_id, tag_service_id)

        version = 486 if file_service_id == self.modules_services.combined_local_media_service_id else 400

        table_dict[cache_display_current_mappings_table_name] = (
            'CREATE TABLE IF NOT EXISTS {} ( hash_id INTEGER, tag_id INTEGER, PRIMARY KEY ( hash_id, tag_id ) ) WITHOUT ROWID;',
            version)
        table_dict[cache_display_pending_mappings_table_name] = (
            'CREATE TABLE IF NOT EXISTS {} ( hash_id INTEGER, tag_id INTEGER, PRIMARY KEY ( hash_id, tag_id ) ) WITHOUT ROWID;',
            version)

        return table_dict
Пример #7
0
    def _GetServiceIndexGenerationDictSingle(self, file_service_id,
                                             tag_service_id):

        (
            cache_display_current_mappings_table_name,
            cache_display_pending_mappings_table_name
        ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames(
            file_service_id, tag_service_id)

        version = 486 if file_service_id == self.modules_services.combined_local_media_service_id else 400

        index_generation_dict = {}

        index_generation_dict[cache_display_current_mappings_table_name] = [
            (['tag_id', 'hash_id'], True, version)
        ]

        index_generation_dict[cache_display_pending_mappings_table_name] = [
            (['tag_id', 'hash_id'], True, version)
        ]

        return index_generation_dict
Пример #8
0
    def AddMappings(self, file_service_id, tag_service_id, tag_id, hash_ids):

        # this guy doesn't do rescind pend because of storage calculation issues that need that to occur before deletes to storage tables

        (
            cache_display_current_mappings_table_name,
            cache_display_pending_mappings_table_name
        ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames(
            file_service_id, tag_service_id)

        display_tag_ids = self.modules_tag_display.GetImplies(
            ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, tag_id)

        ac_counts = collections.Counter()

        for display_tag_id in display_tag_ids:

            self._ExecuteMany(
                'INSERT OR IGNORE INTO ' +
                cache_display_current_mappings_table_name +
                ' ( hash_id, tag_id ) VALUES ( ?, ? );',
                ((hash_id, display_tag_id) for hash_id in hash_ids))

            num_added = self._GetRowCount()

            if num_added > 0:

                ac_counts[display_tag_id] += num_added

        if len(ac_counts) > 0:

            counts_cache_changes = [(tag_id, current_delta, 0)
                                    for (tag_id,
                                         current_delta) in ac_counts.items()]

            self.modules_mappings_counts_update.AddCounts(
                ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id,
                counts_cache_changes)
Пример #9
0
    def PendMappings(self, file_service_id, tag_service_id, tag_id, hash_ids):

        (
            cache_display_current_mappings_table_name,
            cache_display_pending_mappings_table_name
        ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames(
            file_service_id, tag_service_id)

        ac_counts = collections.Counter()

        display_tag_ids = self.modules_tag_display.GetImplies(
            ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, tag_id)

        for display_tag_id in display_tag_ids:

            self._ExecuteMany(
                'INSERT OR IGNORE INTO ' +
                cache_display_pending_mappings_table_name +
                ' ( hash_id, tag_id ) VALUES ( ?, ? );',
                ((hash_id, display_tag_id) for hash_id in hash_ids))

            num_added = self._GetRowCount()

            if num_added > 0:

                ac_counts[display_tag_id] += num_added

        if len(ac_counts) > 0:

            counts_cache_changes = [(tag_id, 0, pending_delta)
                                    for (tag_id,
                                         pending_delta) in ac_counts.items()]

            self.modules_mappings_counts_update.AddCounts(
                ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id,
                counts_cache_changes)
Пример #10
0
    def AddImplications(self,
                        file_service_id,
                        tag_service_id,
                        implication_tag_ids,
                        tag_id,
                        status_hook=None):

        if len(implication_tag_ids) == 0:

            return

        (cache_current_mappings_table_name, cache_deleted_mappings_table_name,
         cache_pending_mappings_table_name
         ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames(
             file_service_id, tag_service_id)
        (
            cache_display_current_mappings_table_name,
            cache_display_pending_mappings_table_name
        ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames(
            file_service_id, tag_service_id)

        statuses_to_count_delta = collections.Counter()

        (
            current_implication_tag_ids, current_implication_tag_ids_weight,
            pending_implication_tag_ids, pending_implication_tag_ids_weight
        ) = self.modules_mappings_counts.GetCurrentPendingPositiveCountsAndWeights(
            ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id,
            implication_tag_ids)

        jobs = []

        jobs.append(
            (HC.CONTENT_STATUS_CURRENT,
             cache_display_current_mappings_table_name,
             cache_current_mappings_table_name, current_implication_tag_ids,
             current_implication_tag_ids_weight))
        jobs.append(
            (HC.CONTENT_STATUS_PENDING,
             cache_display_pending_mappings_table_name,
             cache_pending_mappings_table_name, pending_implication_tag_ids,
             pending_implication_tag_ids_weight))

        for (status, cache_display_mappings_table_name,
             cache_mappings_table_name, add_tag_ids,
             add_tag_ids_weight) in jobs:

            if add_tag_ids_weight == 0:

                # nothing to actually add, so nbd

                continue

            if len(add_tag_ids) == 1:

                (add_tag_id, ) = add_tag_ids

                self._Execute(
                    'INSERT OR IGNORE INTO {} ( hash_id, tag_id ) SELECT hash_id, ? FROM {} WHERE tag_id = ?;'
                    .format(cache_display_mappings_table_name,
                            cache_mappings_table_name), (tag_id, add_tag_id))

                statuses_to_count_delta[status] = self._GetRowCount()

            else:

                with self._MakeTemporaryIntegerTable(
                        add_tag_ids, 'tag_id') as temp_tag_ids_table_name:

                    # for all new implications, get files with those tags and not existing

                    self._Execute(
                        'INSERT OR IGNORE INTO {} ( hash_id, tag_id ) SELECT hash_id, ? FROM {} CROSS JOIN {} USING ( tag_id );'
                        .format(cache_display_mappings_table_name,
                                temp_tag_ids_table_name,
                                cache_mappings_table_name), (tag_id, ))

                    statuses_to_count_delta[status] = self._GetRowCount()

        current_delta = statuses_to_count_delta[HC.CONTENT_STATUS_CURRENT]
        pending_delta = statuses_to_count_delta[HC.CONTENT_STATUS_PENDING]

        if current_delta > 0 or pending_delta > 0:

            counts_cache_changes = ((tag_id, current_delta, pending_delta), )

            self.modules_mappings_counts_update.AddCounts(
                ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id,
                counts_cache_changes)
Пример #11
0
    def DeleteImplications(self,
                           file_service_id,
                           tag_service_id,
                           implication_tag_ids,
                           tag_id,
                           status_hook=None):

        if len(implication_tag_ids) == 0:

            return

        statuses_to_count_delta = collections.Counter()

        (cache_current_mappings_table_name, cache_deleted_mappings_table_name,
         cache_pending_mappings_table_name
         ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames(
             file_service_id, tag_service_id)
        (
            cache_display_current_mappings_table_name,
            cache_display_pending_mappings_table_name
        ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames(
            file_service_id, tag_service_id)

        remaining_implication_tag_ids = set(
            self.modules_tag_display.GetImpliedBy(
                ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id,
                tag_id)).difference(implication_tag_ids)

        (
            current_implication_tag_ids, current_implication_tag_ids_weight,
            pending_implication_tag_ids, pending_implication_tag_ids_weight
        ) = self.modules_mappings_counts.GetCurrentPendingPositiveCountsAndWeights(
            ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id,
            implication_tag_ids)
        (
            current_remaining_implication_tag_ids,
            current_remaining_implication_tag_ids_weight,
            pending_remaining_implication_tag_ids,
            pending_remaining_implication_tag_ids_weight
        ) = self.modules_mappings_counts.GetCurrentPendingPositiveCountsAndWeights(
            ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id,
            remaining_implication_tag_ids)

        jobs = []

        jobs.append(
            (HC.CONTENT_STATUS_CURRENT,
             cache_display_current_mappings_table_name,
             cache_current_mappings_table_name, current_implication_tag_ids,
             current_implication_tag_ids_weight,
             current_remaining_implication_tag_ids,
             current_remaining_implication_tag_ids_weight))
        jobs.append(
            (HC.CONTENT_STATUS_PENDING,
             cache_display_pending_mappings_table_name,
             cache_pending_mappings_table_name, pending_implication_tag_ids,
             pending_implication_tag_ids_weight,
             pending_remaining_implication_tag_ids,
             pending_remaining_implication_tag_ids_weight))

        for (status, cache_display_mappings_table_name,
             cache_mappings_table_name, removee_tag_ids,
             removee_tag_ids_weight, keep_tag_ids,
             keep_tag_ids_weight) in jobs:

            if removee_tag_ids_weight == 0:

                # nothing to remove, so nothing to do!

                continue

            # ultimately here, we are doing "delete all display mappings with hash_ids that have a storage mapping for a removee tag and no storage mappings for a keep tag
            # in order to reduce overhead, we go full meme and do a bunch of different situations

            with self._MakeTemporaryIntegerTable(
                [], 'tag_id') as temp_removee_tag_ids_table_name:

                with self._MakeTemporaryIntegerTable(
                    [], 'tag_id') as temp_keep_tag_ids_table_name:

                    if len(removee_tag_ids) == 1:

                        (removee_tag_id, ) = removee_tag_ids

                        hash_id_in_storage_remove = 'hash_id IN ( SELECT hash_id FROM {} WHERE tag_id = {} )'.format(
                            cache_mappings_table_name, removee_tag_id)

                    else:

                        self._ExecuteMany(
                            'INSERT INTO {} ( tag_id ) VALUES ( ? );'.format(
                                temp_removee_tag_ids_table_name),
                            ((removee_tag_id, )
                             for removee_tag_id in removee_tag_ids))

                        hash_id_in_storage_remove = 'hash_id IN ( SELECT DISTINCT hash_id FROM {} CROSS JOIN {} USING ( tag_id ) )'.format(
                            temp_removee_tag_ids_table_name,
                            cache_mappings_table_name)

                    if keep_tag_ids_weight == 0:

                        predicates_phrase = hash_id_in_storage_remove

                    else:

                        # WARNING, WARNING: Big Brain Query, potentially great/awful
                        # note that in the 'clever/file join' situation, the number of total mappings is many, but we are deleting a few
                        # we want to precisely scan the status of the potential hashes to delete, not scan through them all to see what not to do
                        # therefore, we do NOT EXISTS, which just scans the parts, rather than NOT IN, which does the whole query and then checks against all results

                        if len(keep_tag_ids) == 1:

                            (keep_tag_id, ) = keep_tag_ids

                            if ClientDBMappingsStorage.DoingAFileJoinTagSearchIsFaster(
                                    removee_tag_ids_weight,
                                    keep_tag_ids_weight):

                                hash_id_not_in_storage_keep = 'NOT EXISTS ( SELECT 1 FROM {} WHERE {}.hash_id = {}.hash_id and tag_id = {} )'.format(
                                    cache_mappings_table_name,
                                    cache_display_mappings_table_name,
                                    cache_mappings_table_name, keep_tag_id)

                            else:

                                hash_id_not_in_storage_keep = 'hash_id NOT IN ( SELECT hash_id FROM {} WHERE tag_id = {} )'.format(
                                    cache_mappings_table_name, keep_tag_id)

                        else:

                            self._ExecuteMany(
                                'INSERT INTO {} ( tag_id ) VALUES ( ? );'.
                                format(temp_keep_tag_ids_table_name),
                                ((keep_tag_id, )
                                 for keep_tag_id in keep_tag_ids))

                            if ClientDBMappingsStorage.DoingAFileJoinTagSearchIsFaster(
                                    removee_tag_ids_weight,
                                    keep_tag_ids_weight):

                                # (files to) mappings to temp tags
                                hash_id_not_in_storage_keep = 'NOT EXISTS ( SELECT 1 FROM {} CROSS JOIN {} USING ( tag_id ) WHERE {}.hash_id = {}.hash_id )'.format(
                                    cache_mappings_table_name,
                                    temp_keep_tag_ids_table_name,
                                    cache_display_mappings_table_name,
                                    cache_mappings_table_name)

                            else:

                                # temp tags to mappings
                                hash_id_not_in_storage_keep = ' hash_id NOT IN ( SELECT DISTINCT hash_id FROM {} CROSS JOIN {} USING ( tag_id ) )'.format(
                                    temp_keep_tag_ids_table_name,
                                    cache_mappings_table_name)

                        predicates_phrase = '{} AND {}'.format(
                            hash_id_in_storage_remove,
                            hash_id_not_in_storage_keep)

                    query = 'DELETE FROM {} WHERE tag_id = {} AND {};'.format(
                        cache_display_mappings_table_name, tag_id,
                        predicates_phrase)

                    self._Execute(query)

                    statuses_to_count_delta[status] = self._GetRowCount()

        current_delta = statuses_to_count_delta[HC.CONTENT_STATUS_CURRENT]
        pending_delta = statuses_to_count_delta[HC.CONTENT_STATUS_PENDING]

        if current_delta > 0 or pending_delta > 0:

            counts_cache_changes = ((tag_id, current_delta, pending_delta), )

            self.modules_mappings_counts_update.ReduceCounts(
                ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id,
                counts_cache_changes)
 def AddMappings( self, tag_service_id, tag_id, hash_ids, filtered_hashes_generator: FilteredHashesGenerator ):
     
     for ( file_service_id, filtered_hash_ids ) in filtered_hashes_generator.IterateHashes( hash_ids ):
         
         ( cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id )
         
         # we have to interleave this into the iterator so that if two siblings with the same ideal are pend->currented at once, we remain logic consistent for soletag lookups!
         self.modules_mappings_cache_specific_display.RescindPendingMappings( file_service_id, tag_service_id, tag_id, filtered_hash_ids )
         
         self._ExecuteMany( 'DELETE FROM ' + cache_pending_mappings_table_name + ' WHERE hash_id = ? AND tag_id = ?;', ( ( hash_id, tag_id ) for hash_id in filtered_hash_ids ) )
         
         num_pending_rescinded = self._GetRowCount()
         
         #
         
         self._ExecuteMany( 'INSERT OR IGNORE INTO ' + cache_current_mappings_table_name + ' ( hash_id, tag_id ) VALUES ( ?, ? );', ( ( hash_id, tag_id ) for hash_id in filtered_hash_ids ) )
         
         num_current_inserted = self._GetRowCount()
         
         #
         
         self._ExecuteMany( 'DELETE FROM ' + cache_deleted_mappings_table_name + ' WHERE hash_id = ? AND tag_id = ?;', ( ( hash_id, tag_id ) for hash_id in filtered_hash_ids ) )
         
         if num_current_inserted > 0:
             
             counts_cache_changes = [ ( tag_id, num_current_inserted, 0 ) ]
             
             self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, counts_cache_changes )
             
         
         if num_pending_rescinded > 0:
             
             counts_cache_changes = [ ( tag_id, 0, num_pending_rescinded ) ]
             
             self.modules_mappings_counts_update.ReduceCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, counts_cache_changes )
             
         
         self.modules_mappings_cache_specific_display.AddMappings( file_service_id, tag_service_id, tag_id, filtered_hash_ids )
 def RegeneratePending( self, file_service_id, tag_service_id, status_hook = None ):
     
     ( current_mappings_table_name, deleted_mappings_table_name, pending_mappings_table_name, petitioned_mappings_table_name ) = ClientDBMappingsStorage.GenerateMappingsTableNames( tag_service_id )
     ( cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id )
     
     if status_hook is not None:
         
         message = 'clearing old specific data'
         
         status_hook( message )
         
     
     all_pending_storage_tag_ids = self._STS( self._Execute( 'SELECT DISTINCT tag_id FROM {};'.format( pending_mappings_table_name ) ) )
     
     self.modules_mappings_counts.ClearCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, keep_current = True )
     
     self._Execute( 'DELETE FROM {};'.format( cache_pending_mappings_table_name ) )
     
     counts_cache_changes = []
     
     num_to_do = len( all_pending_storage_tag_ids )
     
     select_table_join = self.modules_files_storage.GetTableJoinLimitedByFileDomain( file_service_id, pending_mappings_table_name, HC.CONTENT_STATUS_CURRENT )
     
     for ( i, storage_tag_id ) in enumerate( all_pending_storage_tag_ids ):
         
         if i % 100 == 0 and status_hook is not None:
             
             message = 'regenerating pending tags {}'.format( HydrusData.ConvertValueRangeToPrettyString( i + 1, num_to_do ) )
             
             status_hook( message )
             
         
         self._Execute( 'INSERT OR IGNORE INTO {} ( tag_id, hash_id ) SELECT tag_id, hash_id FROM {} WHERE tag_id = ?;'.format( cache_pending_mappings_table_name, select_table_join ), ( storage_tag_id, ) )
         
         pending_delta = self._GetRowCount()
         
         counts_cache_changes.append( ( storage_tag_id, 0, pending_delta ) )
         
     
     self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, counts_cache_changes )
     
     self.modules_mappings_cache_specific_display.RegeneratePending( file_service_id, tag_service_id, status_hook = status_hook )
 def PendMappings( self, tag_service_id, tag_id, hash_ids, filtered_hashes_generator: FilteredHashesGenerator ):
     
     for ( file_service_id, filtered_hash_ids ) in filtered_hashes_generator.IterateHashes( hash_ids ):
         
         ( cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id )
         
         self._ExecuteMany( 'INSERT OR IGNORE INTO ' + cache_pending_mappings_table_name + ' ( hash_id, tag_id ) VALUES ( ?, ? );', ( ( hash_id, tag_id ) for hash_id in filtered_hash_ids ) )
         
         num_added = self._GetRowCount()
         
         if num_added > 0:
             
             counts_cache_changes = [ ( tag_id, 0, num_added ) ]
             
             self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, counts_cache_changes )
             
         
         self.modules_mappings_cache_specific_display.PendMappings( file_service_id, tag_service_id, tag_id, filtered_hash_ids )
 def RescindPendingMappings( self, tag_service_id, tag_id, hash_ids, filtered_hashes_generator: FilteredHashesGenerator ):
     
     for ( file_service_id, filtered_hash_ids ) in filtered_hashes_generator.IterateHashes( hash_ids ):
         
         ( cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id )
         
         ac_counts = collections.Counter()
         
         self.modules_mappings_cache_specific_display.RescindPendingMappings( file_service_id, tag_service_id, tag_id, filtered_hash_ids )
         
         self._ExecuteMany( 'DELETE FROM ' + cache_pending_mappings_table_name + ' WHERE hash_id = ? AND tag_id = ?;', ( ( hash_id, tag_id ) for hash_id in filtered_hash_ids ) )
         
         num_deleted = self._GetRowCount()
         
         if num_deleted > 0:
             
             counts_cache_changes = [ ( tag_id, 0, num_deleted ) ]
             
             self.modules_mappings_counts_update.ReduceCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, counts_cache_changes )
    def Generate(self, tag_service_id):

        self.modules_mappings_counts.CreateTables(
            ClientTags.TAG_DISPLAY_STORAGE,
            self.modules_services.combined_file_service_id, tag_service_id)

        #

        (current_mappings_table_name, deleted_mappings_table_name,
         pending_mappings_table_name, petitioned_mappings_table_name
         ) = ClientDBMappingsStorage.GenerateMappingsTableNames(tag_service_id)

        current_mappings_exist = self._Execute(
            'SELECT 1 FROM ' + current_mappings_table_name +
            ' LIMIT 1;').fetchone() is not None
        pending_mappings_exist = self._Execute(
            'SELECT 1 FROM ' + pending_mappings_table_name +
            ' LIMIT 1;').fetchone() is not None

        if current_mappings_exist or pending_mappings_exist:  # not worth iterating through all known tags for an empty service

            for (group_of_ids, num_done,
                 num_to_do) in HydrusDB.ReadLargeIdQueryInSeparateChunks(
                     self._c, 'SELECT tag_id FROM tags;',
                     10000):  # must be a cleverer way of doing this

                with self._MakeTemporaryIntegerTable(
                        group_of_ids, 'tag_id') as temp_table_name:

                    current_counter = collections.Counter()

                    # temp tags to mappings
                    for (tag_id, count) in self._Execute(
                            'SELECT tag_id, COUNT( * ) FROM {} CROSS JOIN {} USING ( tag_id ) GROUP BY ( tag_id );'
                            .format(temp_table_name,
                                    current_mappings_table_name)):

                        current_counter[tag_id] = count

                    pending_counter = collections.Counter()

                    # temp tags to mappings
                    for (tag_id, count) in self._Execute(
                            'SELECT tag_id, COUNT( * ) FROM {} CROSS JOIN {} USING ( tag_id ) GROUP BY ( tag_id );'
                            .format(temp_table_name,
                                    pending_mappings_table_name)):

                        pending_counter[tag_id] = count

                all_ids_seen = set(current_counter.keys())
                all_ids_seen.update(pending_counter.keys())

                counts_cache_changes = [(tag_id, current_counter[tag_id],
                                         pending_counter[tag_id])
                                        for tag_id in all_ids_seen]

                if len(counts_cache_changes) > 0:

                    self.modules_mappings_counts_update.AddCounts(
                        ClientTags.TAG_DISPLAY_STORAGE,
                        self.modules_services.combined_file_service_id,
                        tag_service_id, counts_cache_changes)

        self.modules_mappings_cache_combined_files_display.Generate(
            tag_service_id)
 def AddFiles( self, file_service_id, tag_service_id, hash_ids, hash_ids_table_name ):
     
     ( cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id )
     
     ( current_mappings_table_name, deleted_mappings_table_name, pending_mappings_table_name, petitioned_mappings_table_name ) = ClientDBMappingsStorage.GenerateMappingsTableNames( tag_service_id )
     
     # deleted don't have a/c counts to update, so we can do it all in one go here
     self._Execute( 'INSERT OR IGNORE INTO {} ( hash_id, tag_id ) SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( cache_deleted_mappings_table_name, hash_ids_table_name, deleted_mappings_table_name ) )
     
     # temp hashes to mappings
     current_mapping_ids_raw = self._Execute( 'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( hash_ids_table_name, current_mappings_table_name ) ).fetchall()
     
     current_mapping_ids_dict = HydrusData.BuildKeyToSetDict( current_mapping_ids_raw )
     
     # temp hashes to mappings
     pending_mapping_ids_raw = self._Execute( 'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( hash_ids_table_name, pending_mappings_table_name ) ).fetchall()
     
     pending_mapping_ids_dict = HydrusData.BuildKeyToSetDict( pending_mapping_ids_raw )
     
     all_ids_seen = set( current_mapping_ids_dict.keys() )
     all_ids_seen.update( pending_mapping_ids_dict.keys() )
     
     counts_cache_changes = []
     
     for tag_id in all_ids_seen:
         
         current_hash_ids = current_mapping_ids_dict[ tag_id ]
         
         current_delta = len( current_hash_ids )
         
         if current_delta > 0:
             
             self._ExecuteMany( 'INSERT OR IGNORE INTO ' + cache_current_mappings_table_name + ' ( hash_id, tag_id ) VALUES ( ?, ? );', ( ( hash_id, tag_id ) for hash_id in current_hash_ids ) )
             
             current_delta = self._GetRowCount()
             
         
         #
         
         pending_hash_ids = pending_mapping_ids_dict[ tag_id ]
         
         pending_delta = len( pending_hash_ids )
         
         if pending_delta > 0:
             
             self._ExecuteMany( 'INSERT OR IGNORE INTO ' + cache_pending_mappings_table_name + ' ( hash_id, tag_id ) VALUES ( ?, ? );', ( ( hash_id, tag_id ) for hash_id in pending_hash_ids ) )
             
             pending_delta = self._GetRowCount()
             
         
         #
         
         if current_delta > 0 or pending_delta > 0:
             
             counts_cache_changes.append( ( tag_id, current_delta, pending_delta ) )
             
         
     
     if len( counts_cache_changes ) > 0:
         
         self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, counts_cache_changes )
Пример #18
0
    def RegeneratePending(self,
                          file_service_id,
                          tag_service_id,
                          status_hook=None):

        (cache_current_mappings_table_name, cache_deleted_mappings_table_name,
         cache_pending_mappings_table_name
         ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames(
             file_service_id, tag_service_id)
        (
            cache_display_current_mappings_table_name,
            cache_display_pending_mappings_table_name
        ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames(
            file_service_id, tag_service_id)

        if status_hook is not None:

            message = 'clearing old specific display data'

            status_hook(message)

        all_pending_storage_tag_ids = self._STS(
            self._Execute('SELECT DISTINCT tag_id FROM {};'.format(
                cache_pending_mappings_table_name)))

        storage_tag_ids_to_display_tag_ids = self.modules_tag_display.GetTagsToImplies(
            ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id,
            all_pending_storage_tag_ids)

        all_pending_display_tag_ids = set(
            itertools.chain.from_iterable(
                storage_tag_ids_to_display_tag_ids.values()))

        del all_pending_storage_tag_ids
        del storage_tag_ids_to_display_tag_ids

        self.modules_mappings_counts.ClearCounts(ClientTags.TAG_DISPLAY_ACTUAL,
                                                 file_service_id,
                                                 tag_service_id,
                                                 keep_current=True)

        self._Execute('DELETE FROM {};'.format(
            cache_display_pending_mappings_table_name))

        all_pending_display_tag_ids_to_implied_by_storage_tag_ids = self.modules_tag_display.GetTagsToImpliedBy(
            ClientTags.TAG_DISPLAY_ACTUAL,
            tag_service_id,
            all_pending_display_tag_ids,
            tags_are_ideal=True)

        counts_cache_changes = []

        num_to_do = len(
            all_pending_display_tag_ids_to_implied_by_storage_tag_ids)

        for (i, (display_tag_id, storage_tag_ids)) in enumerate(
                all_pending_display_tag_ids_to_implied_by_storage_tag_ids.
                items()):

            if i % 100 == 0 and status_hook is not None:

                message = 'regenerating pending tags {}'.format(
                    HydrusData.ConvertValueRangeToPrettyString(
                        i + 1, num_to_do))

                status_hook(message)

            if len(storage_tag_ids) == 1:

                (storage_tag_id, ) = storage_tag_ids

                self._Execute(
                    'INSERT OR IGNORE INTO {} ( tag_id, hash_id ) SELECT ?, hash_id FROM {} WHERE tag_id = ?;'
                    .format(cache_display_pending_mappings_table_name,
                            cache_pending_mappings_table_name),
                    (display_tag_id, storage_tag_id))

                pending_delta = self._GetRowCount()

            else:

                with self._MakeTemporaryIntegerTable(
                        storage_tag_ids, 'tag_id') as temp_tag_ids_table_name:

                    # temp tags to mappings merged
                    self._Execute(
                        'INSERT OR IGNORE INTO {} ( tag_id, hash_id ) SELECT DISTINCT ?, hash_id FROM {} CROSS JOIN {} USING ( tag_id );'
                        .format(cache_display_pending_mappings_table_name,
                                temp_tag_ids_table_name,
                                cache_pending_mappings_table_name),
                        (display_tag_id, ))

                    pending_delta = self._GetRowCount()

            counts_cache_changes.append((display_tag_id, 0, pending_delta))

        self.modules_mappings_counts_update.AddCounts(
            ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id,
            counts_cache_changes)
Пример #19
0
    def AddFiles(self, file_service_id, tag_service_id, hash_ids,
                 hash_ids_table_name):

        (
            cache_display_current_mappings_table_name,
            cache_display_pending_mappings_table_name
        ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames(
            file_service_id, tag_service_id)

        (cache_current_mappings_table_name, cache_deleted_mappings_table_name,
         cache_pending_mappings_table_name
         ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames(
             file_service_id, tag_service_id)

        # temp hashes to mappings
        storage_current_mapping_ids_raw = self._Execute(
            'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.
            format(hash_ids_table_name,
                   cache_current_mappings_table_name)).fetchall()

        storage_current_mapping_ids_dict = HydrusData.BuildKeyToSetDict(
            storage_current_mapping_ids_raw)

        # temp hashes to mappings
        storage_pending_mapping_ids_raw = self._Execute(
            'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.
            format(hash_ids_table_name,
                   cache_pending_mappings_table_name)).fetchall()

        storage_pending_mapping_ids_dict = HydrusData.BuildKeyToSetDict(
            storage_pending_mapping_ids_raw)

        all_storage_tag_ids = set(storage_current_mapping_ids_dict.keys())
        all_storage_tag_ids.update(storage_pending_mapping_ids_dict.keys())

        storage_tag_ids_to_implies_tag_ids = self.modules_tag_display.GetTagsToImplies(
            ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, all_storage_tag_ids)

        display_tag_ids_to_implied_by_tag_ids = collections.defaultdict(set)

        for (storage_tag_id,
             implies_tag_ids) in storage_tag_ids_to_implies_tag_ids.items():

            for implies_tag_id in implies_tag_ids:

                display_tag_ids_to_implied_by_tag_ids[implies_tag_id].add(
                    storage_tag_id)

        counts_cache_changes = []

        # for all display tags implied by the existing storage mappings, add them
        # btw, when we add files to a specific domain, we know that all inserts are new

        for (display_tag_id, implied_by_tag_ids
             ) in display_tag_ids_to_implied_by_tag_ids.items():

            display_current_hash_ids = set(
                itertools.chain.from_iterable(
                    (storage_current_mapping_ids_dict[implied_by_tag_id]
                     for implied_by_tag_id in implied_by_tag_ids)))

            current_delta = len(display_current_hash_ids)

            if current_delta > 0:

                self._ExecuteMany(
                    'INSERT OR IGNORE INTO ' +
                    cache_display_current_mappings_table_name +
                    ' ( hash_id, tag_id ) VALUES ( ?, ? );',
                    ((hash_id, display_tag_id)
                     for hash_id in display_current_hash_ids))

            #

            display_pending_hash_ids = set(
                itertools.chain.from_iterable(
                    (storage_pending_mapping_ids_dict[implied_by_tag_id]
                     for implied_by_tag_id in implied_by_tag_ids)))

            pending_delta = len(display_pending_hash_ids)

            if pending_delta > 0:

                self._ExecuteMany(
                    'INSERT OR IGNORE INTO ' +
                    cache_display_pending_mappings_table_name +
                    ' ( hash_id, tag_id ) VALUES ( ?, ? );',
                    ((hash_id, display_tag_id)
                     for hash_id in display_pending_hash_ids))

            #

            if current_delta > 0 or pending_delta > 0:

                counts_cache_changes.append(
                    (display_tag_id, current_delta, pending_delta))

        if len(counts_cache_changes) > 0:

            self.modules_mappings_counts_update.AddCounts(
                ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id,
                counts_cache_changes)
Пример #20
0
    def RescindPendingMappings(self, file_service_id, tag_service_id,
                               storage_tag_id, hash_ids):

        (
            cache_display_current_mappings_table_name,
            cache_display_pending_mappings_table_name
        ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames(
            file_service_id, tag_service_id)

        implies_tag_ids = self.modules_tag_display.GetImplies(
            ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, storage_tag_id)

        implies_tag_ids_to_implied_by_tag_ids = self.modules_tag_display.GetTagsToImpliedBy(
            ClientTags.TAG_DISPLAY_ACTUAL,
            tag_service_id,
            implies_tag_ids,
            tags_are_ideal=True)

        ac_counts = collections.Counter()

        for (display_tag_id, implied_by_tag_ids
             ) in implies_tag_ids_to_implied_by_tag_ids.items():

            # for every tag implied by the storage tag being removed

            other_implied_by_tag_ids = set(implied_by_tag_ids)
            other_implied_by_tag_ids.discard(storage_tag_id)

            if len(other_implied_by_tag_ids) == 0:

                # nothing else implies this tag on display, so can just straight up delete

                self._ExecuteMany(
                    'DELETE FROM {} WHERE tag_id = ? AND hash_id = ?;'.format(
                        cache_display_pending_mappings_table_name),
                    ((display_tag_id, hash_id) for hash_id in hash_ids))

                num_rescinded = self._GetRowCount()

            else:

                # other things imply this tag on display, so we need to check storage to see what else has it
                statuses_to_table_names = self.modules_mappings_storage.GetFastestStorageMappingTableNames(
                    file_service_id, tag_service_id)

                mappings_table_name = statuses_to_table_names[
                    HC.CONTENT_STATUS_PENDING]

                with self._MakeTemporaryIntegerTable(
                        other_implied_by_tag_ids, 'tag_id') as temp_table_name:

                    # storage mappings to temp other tag ids
                    # delete mappings where it shouldn't exist for other reasons lad
                    delete = 'DELETE FROM {} WHERE tag_id = ? AND hash_id = ? AND NOT EXISTS ( SELECT 1 FROM {} CROSS JOIN {} USING ( tag_id ) WHERE hash_id = ? )'.format(
                        cache_display_pending_mappings_table_name,
                        mappings_table_name, temp_table_name)

                    self._ExecuteMany(delete,
                                      ((display_tag_id, hash_id, hash_id)
                                       for hash_id in hash_ids))

                    num_rescinded = self._GetRowCount()

            if num_rescinded > 0:

                ac_counts[display_tag_id] += num_rescinded

        if len(ac_counts) > 0:

            counts_cache_changes = [(tag_id, 0, pending_delta)
                                    for (tag_id,
                                         pending_delta) in ac_counts.items()]

            self.modules_mappings_counts_update.ReduceCounts(
                ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id,
                counts_cache_changes)
Пример #21
0
 def GetHashIdsFromTagIds( self, tag_display_type: int, file_service_key: bytes, tag_search_context: ClientSearch.TagSearchContext, tag_ids: typing.Collection[ int ], hash_ids = None, hash_ids_table_name = None, job_key = None ):
     
     do_hash_table_join = False
     
     if hash_ids_table_name is not None and hash_ids is not None:
         
         tag_service_id = self.modules_services.GetServiceId( tag_search_context.service_key )
         file_service_id = self.modules_services.GetServiceId( file_service_key )
         
         estimated_count = self.modules_mappings_counts.GetAutocompleteCountEstimate( tag_display_type, tag_service_id, file_service_id, tag_ids, tag_search_context.include_current_tags, tag_search_context.include_pending_tags )
         
         if ClientDBMappingsStorage.DoingAFileJoinTagSearchIsFaster( len( hash_ids ), estimated_count ):
             
             do_hash_table_join = True
             
         
     
     result_hash_ids = set()
     
     table_names = self.modules_tag_search.GetMappingTables( tag_display_type, file_service_key, tag_search_context )
     
     cancelled_hook = None
     
     if job_key is not None:
         
         cancelled_hook = job_key.IsCancelled
         
     
     if len( tag_ids ) == 1:
         
         ( tag_id, ) = tag_ids
         
         if do_hash_table_join:
             
             # temp hashes to mappings
             queries = [ 'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id ) WHERE tag_id = ?'.format( hash_ids_table_name, table_name ) for table_name in table_names ]
             
         else:
             
             queries = [ 'SELECT hash_id FROM {} WHERE tag_id = ?;'.format( table_name ) for table_name in table_names ]
             
         
         for query in queries:
             
             cursor = self._Execute( query, ( tag_id, ) )
             
             result_hash_ids.update( self._STI( HydrusDB.ReadFromCancellableCursor( cursor, 1024, cancelled_hook ) ) )
             
         
     else:
         
         with self._MakeTemporaryIntegerTable( tag_ids, 'tag_id' ) as temp_tag_ids_table_name:
             
             if do_hash_table_join:
                 
                 # temp hashes to mappings to temp tags
                 # old method, does not do EXISTS efficiently, it makes a list instead and checks that
                 # queries = [ 'SELECT hash_id FROM {} WHERE EXISTS ( SELECT 1 FROM {} CROSS JOIN {} USING ( tag_id ) WHERE {}.hash_id = {}.hash_id );'.format( hash_ids_table_name, table_name, temp_tag_ids_table_name, table_name, hash_ids_table_name ) for table_name in table_names ]
                 # new method, this seems to actually do the correlated scalar subquery, although it does seem to be sqlite voodoo
                 queries = [ 'SELECT hash_id FROM {} WHERE EXISTS ( SELECT 1 FROM {} WHERE {}.hash_id = {}.hash_id AND EXISTS ( SELECT 1 FROM {} WHERE {}.tag_id = {}.tag_id ) );'.format( hash_ids_table_name, table_name, table_name, hash_ids_table_name, temp_tag_ids_table_name, table_name, temp_tag_ids_table_name ) for table_name in table_names ]
                 
             else:
                 
                 # temp tags to mappings
                 queries = [ 'SELECT hash_id FROM {} CROSS JOIN {} USING ( tag_id );'.format( temp_tag_ids_table_name, table_name ) for table_name in table_names ]
                 
             
             for query in queries:
                 
                 cursor = self._Execute( query )
                 
                 result_hash_ids.update( self._STI( HydrusDB.ReadFromCancellableCursor( cursor, 1024, cancelled_hook ) ) )
                 
             
         
     
     return result_hash_ids