Python HydrusDB.ReadFromCancellableCursor примеры использования

Язык программирования: Python

Пространство имен/Пакет: hydrus.core

Класс/Тип: HydrusDB

Метод/Функция: ReadFromCancellableCursor

Примеров на hotexamples.com: 4

Python HydrusDB.ReadFromCancellableCursor - 4 примера найдено. Это лучшие примеры Python кода для hydrus.core.HydrusDB.ReadFromCancellableCursor, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

TemporaryIntegerTable(14)

GetRowCount(6)

ReadFromCancellableCursor(4)

ReadLargeIdQueryInSeparateChunks(3)

Пример #1

Показать файл

 def GetHashIdsThatHaveTagsSimpleLocation( self, tag_display_type: int, file_service_key: bytes, tag_search_context: ClientSearch.TagSearchContext, namespace_ids_table_name = None, hash_ids_table_name = None, job_key = None ):
     
     mapping_and_tag_table_names = self.modules_tag_search.GetMappingAndTagTables( tag_display_type, file_service_key, tag_search_context )
     
     if hash_ids_table_name is None:
         
         if namespace_ids_table_name is None:
             
             # hellmode
             queries = [ 'SELECT DISTINCT hash_id FROM {};'.format( mappings_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]
             
         else:
             
             # temp namespaces to tags to mappings
             queries = [ 'SELECT DISTINCT hash_id FROM {} CROSS JOIN {} USING ( namespace_id ) CROSS JOIN {} USING ( tag_id );'.format( namespace_ids_table_name, tags_table_name, mappings_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]
             
         
     else:
         
         if namespace_ids_table_name is None:
             
             queries = [ 'SELECT hash_id FROM {} WHERE EXISTS ( SELECT 1 FROM {} WHERE {}.hash_id = {}.hash_id );'.format( hash_ids_table_name, mappings_table_name, mappings_table_name, hash_ids_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]
             
         else:
             
             # temp hashes to mappings to tags to temp namespaces
             # this was originally a 'WHERE EXISTS' thing, but doing that on a three way cross join is too complex for that to work well
             # let's hope DISTINCT can save time too
             queries = [ 'SELECT DISTINCT hash_id FROM {} CROSS JOIN {} USING ( hash_id ) CROSS JOIN {} USING ( tag_id ) CROSS JOIN {} USING ( namespace_id );'.format( hash_ids_table_name, mappings_table_name, tags_table_name, namespace_ids_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]
             
         
     
     cancelled_hook = None
     
     if job_key is not None:
         
         cancelled_hook = job_key.IsCancelled
         
     
     nonzero_tag_hash_ids = set()
     
     for query in queries:
         
         cursor = self._Execute( query )
         
         nonzero_tag_hash_ids.update( self._STI( HydrusDB.ReadFromCancellableCursor( cursor, 10240, cancelled_hook ) ) )
         
         if job_key is not None and job_key.IsCancelled():
             
             return set()
             
         
     
     return nonzero_tag_hash_ids

Пример #2

Показать файл

    def GetSubtagIdsFromWildcardIntoTable(self,
                                          file_service_id: int,
                                          tag_service_id: int,
                                          subtag_wildcard,
                                          subtag_id_table_name,
                                          job_key=None):

        if tag_service_id == self.modules_services.combined_tag_service_id:

            search_tag_service_ids = self.modules_services.GetServiceIds(
                HC.REAL_TAG_SERVICES)

        else:

            search_tag_service_ids = (tag_service_id, )

        for search_tag_service_id in search_tag_service_ids:

            if '*' in subtag_wildcard:

                subtags_fts4_table_name = self.GetSubtagsFTS4TableName(
                    file_service_id, search_tag_service_id)

                wildcard_has_fts4_searchable_characters = WildcardHasFTS4SearchableCharacters(
                    subtag_wildcard)

                if subtag_wildcard == '*':

                    # hellmode, but shouldn't be called normally
                    cursor = self._Execute('SELECT docid FROM {};'.format(
                        subtags_fts4_table_name))

                elif ClientSearch.IsComplexWildcard(
                        subtag_wildcard
                ) or not wildcard_has_fts4_searchable_characters:

                    # FTS4 does not support complex wildcards, so instead we'll search our raw subtags
                    # however, since we want to search 'searchable' text, we use the 'searchable subtags map' to cross between real and searchable

                    like_param = ConvertWildcardToSQLiteLikeParameter(
                        subtag_wildcard)

                    if subtag_wildcard.startswith(
                            '*'
                    ) or not wildcard_has_fts4_searchable_characters:

                        # this is a SCAN, but there we go
                        # a potential optimisation here, in future, is to store fts4 of subtags reversed, then for '*amus', we can just search that reverse cache for 'suma*'
                        # and this would only double the size of the fts4 cache, the largest cache in the whole db! a steal!
                        # it also would not fix '*amu*', but with some cleverness could speed up '*amus ar*'

                        query = 'SELECT docid FROM {} WHERE subtag LIKE ?;'.format(
                            subtags_fts4_table_name)

                        cursor = self._Execute(query, (like_param, ))

                    else:

                        # we have an optimisation here--rather than searching all subtags for bl*ah, let's search all the bl* subtags for bl*ah!

                        prefix_fts4_wildcard = subtag_wildcard.split('*')[0]

                        prefix_fts4_wildcard_param = '"{}*"'.format(
                            prefix_fts4_wildcard)

                        query = 'SELECT docid FROM {} WHERE subtag MATCH ? AND subtag LIKE ?;'.format(
                            subtags_fts4_table_name)

                        cursor = self._Execute(
                            query, (prefix_fts4_wildcard_param, like_param))

                else:

                    # we want the " " wrapping our search text to keep whitespace words connected and in order
                    # "samus ar*" should not match "around samus"

                    # simple 'sam*' style subtag, so we can search fts4 no prob

                    subtags_fts4_param = '"{}"'.format(subtag_wildcard)

                    cursor = self._Execute(
                        'SELECT docid FROM {} WHERE subtag MATCH ?;'.format(
                            subtags_fts4_table_name), (subtags_fts4_param, ))

                cancelled_hook = None

                if job_key is not None:

                    cancelled_hook = job_key.IsCancelled

                loop_of_subtag_id_tuples = HydrusDB.ReadFromCancellableCursor(
                    cursor, 1024, cancelled_hook=cancelled_hook)

                self._ExecuteMany(
                    'INSERT OR IGNORE INTO {} ( subtag_id ) VALUES ( ? );'.
                    format(subtag_id_table_name), loop_of_subtag_id_tuples)

            else:

                # old notes from before we had searchable subtag map. I deleted that map once, albeit in an older and less efficient form. *don't delete it again, it has use*
                #
                # NOTE: doing a subtag = 'blah' lookup on subtags_fts4 tables is ultra slow, lmao!
                # attempts to match '/a/' to 'a' with clever FTS4 MATCHing (i.e. a MATCH on a*\b, then an '= a') proved not super successful
                # in testing, it was still a bit slow. my guess is it is still iterating through all the nodes for ^a*, the \b just makes it a bit more efficient sometimes
                # in tests '^a\b' was about twice as fast as 'a*', so the \b might not even be helping at all
                # so, I decided to move back to a lean and upgraded searchable subtag map, and here we are

                searchable_subtag = subtag_wildcard

                if self.modules_tags.SubtagExists(searchable_subtag):

                    searchable_subtag_id = self.modules_tags.GetSubtagId(
                        searchable_subtag)

                    self._Execute(
                        'INSERT OR IGNORE INTO {} ( subtag_id ) VALUES ( ? );'.
                        format(subtag_id_table_name), (searchable_subtag_id, ))

                    subtags_searchable_map_table_name = self.GetSubtagsSearchableMapTableName(
                        file_service_id, search_tag_service_id)

                    self._Execute(
                        'INSERT OR IGNORE INTO {} ( subtag_id ) SELECT subtag_id FROM {} WHERE searchable_subtag_id = ?;'
                        .format(subtag_id_table_name,
                                subtags_searchable_map_table_name),
                        (searchable_subtag_id, ))

            if job_key is not None and job_key.IsCancelled():

                self._Execute('DELETE FROM {};'.format(subtag_id_table_name))

                return

Пример #3

Показать файл

 def GetHashIdsAndNonZeroTagCounts( self, tag_display_type: int, location_context: ClientLocation.LocationContext, tag_search_context: ClientSearch.TagSearchContext, hash_ids, namespace_wildcard = None, job_key = None ):
     
     if namespace_wildcard == '*':
         
         namespace_wildcard = None
         
     
     if namespace_wildcard is None:
         
         namespace_ids = []
         
     else:
         
         namespace_ids = self.modules_tag_search.GetNamespaceIdsFromWildcard( namespace_wildcard )
         
     
     with self._MakeTemporaryIntegerTable( namespace_ids, 'namespace_id' ) as temp_namespace_ids_table_name:
         
         ( file_service_keys, file_location_is_cross_referenced ) = location_context.GetCoveringCurrentFileServiceKeys()
         
         mapping_and_tag_table_names = set()
         
         for file_service_key in file_service_keys:
             
             mapping_and_tag_table_names.update( self.modules_tag_search.GetMappingAndTagTables( tag_display_type, file_service_key, tag_search_context ) )
             
         
         # reason why I (JOIN each table) rather than (join the UNION) is based on previous hell with having query planner figure out a "( a UNION b UNION c ) NATURAL JOIN stuff" situation
         # although the following sometimes makes certifiable 2KB ( 6 UNION * 4-table ) queries, it actually works fast
         
         # OK, a new problem is mass UNION leads to terrible cancelability because the first row cannot be fetched until the first n - 1 union queries are done
         # I tried some gubbins to try to do a pseudo table-union rather than query union and do 'get files->distinct tag count for this union of tables, and fetch hash_ids first on the union', but did not have luck
         
         # so NOW we are just going to do it in bits of files mate. this also reduces memory use from the distinct-making UNION with large numbers of hash_ids
         
         results = []
         
         BLOCK_SIZE = max( 64, int( len( hash_ids ) ** 0.5 ) ) # go for square root for now
         
         for group_of_hash_ids in HydrusData.SplitIteratorIntoChunks( hash_ids, BLOCK_SIZE ):
             
             with self._MakeTemporaryIntegerTable( group_of_hash_ids, 'hash_id' ) as hash_ids_table_name:
                 
                 if namespace_wildcard is None:
                     
                     # temp hashes to mappings
                     select_statements = [ 'SELECT hash_id, tag_id FROM {} CROSS JOIN {} USING ( hash_id )'.format( hash_ids_table_name, mappings_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]
                     
                 else:
                     
                     # temp hashes to mappings to tags to namespaces
                     select_statements = [ 'SELECT hash_id, tag_id FROM {} CROSS JOIN {} USING ( hash_id ) CROSS JOIN {} USING ( tag_id ) CROSS JOIN {} USING ( namespace_id )'.format( hash_ids_table_name, mappings_table_name, tags_table_name, temp_namespace_ids_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]
                     
                 
                 unions = '( {} )'.format( ' UNION '.join( select_statements ) )
                 
                 query = 'SELECT hash_id, COUNT( tag_id ) FROM {} GROUP BY hash_id;'.format( unions )
                 
                 cursor = self._Execute( query )
                 
                 cancelled_hook = None
                 
                 if job_key is not None:
                     
                     cancelled_hook = job_key.IsCancelled
                     
                 
                 loop_of_results = HydrusDB.ReadFromCancellableCursor( cursor, 64, cancelled_hook = cancelled_hook )
                 
                 if job_key is not None and job_key.IsCancelled():
                     
                     return results
                     
                 
                 results.extend( loop_of_results )
                 
             
         
         return results

Пример #4

Показать файл

 def GetHashIdsFromTagIds( self, tag_display_type: int, file_service_key: bytes, tag_search_context: ClientSearch.TagSearchContext, tag_ids: typing.Collection[ int ], hash_ids = None, hash_ids_table_name = None, job_key = None ):
     
     do_hash_table_join = False
     
     if hash_ids_table_name is not None and hash_ids is not None:
         
         tag_service_id = self.modules_services.GetServiceId( tag_search_context.service_key )
         file_service_id = self.modules_services.GetServiceId( file_service_key )
         
         estimated_count = self.modules_mappings_counts.GetAutocompleteCountEstimate( tag_display_type, tag_service_id, file_service_id, tag_ids, tag_search_context.include_current_tags, tag_search_context.include_pending_tags )
         
         if ClientDBMappingsStorage.DoingAFileJoinTagSearchIsFaster( len( hash_ids ), estimated_count ):
             
             do_hash_table_join = True
             
         
     
     result_hash_ids = set()
     
     table_names = self.modules_tag_search.GetMappingTables( tag_display_type, file_service_key, tag_search_context )
     
     cancelled_hook = None
     
     if job_key is not None:
         
         cancelled_hook = job_key.IsCancelled
         
     
     if len( tag_ids ) == 1:
         
         ( tag_id, ) = tag_ids
         
         if do_hash_table_join:
             
             # temp hashes to mappings
             queries = [ 'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id ) WHERE tag_id = ?'.format( hash_ids_table_name, table_name ) for table_name in table_names ]
             
         else:
             
             queries = [ 'SELECT hash_id FROM {} WHERE tag_id = ?;'.format( table_name ) for table_name in table_names ]
             
         
         for query in queries:
             
             cursor = self._Execute( query, ( tag_id, ) )
             
             result_hash_ids.update( self._STI( HydrusDB.ReadFromCancellableCursor( cursor, 1024, cancelled_hook ) ) )
             
         
     else:
         
         with self._MakeTemporaryIntegerTable( tag_ids, 'tag_id' ) as temp_tag_ids_table_name:
             
             if do_hash_table_join:
                 
                 # temp hashes to mappings to temp tags
                 # old method, does not do EXISTS efficiently, it makes a list instead and checks that
                 # queries = [ 'SELECT hash_id FROM {} WHERE EXISTS ( SELECT 1 FROM {} CROSS JOIN {} USING ( tag_id ) WHERE {}.hash_id = {}.hash_id );'.format( hash_ids_table_name, table_name, temp_tag_ids_table_name, table_name, hash_ids_table_name ) for table_name in table_names ]
                 # new method, this seems to actually do the correlated scalar subquery, although it does seem to be sqlite voodoo
                 queries = [ 'SELECT hash_id FROM {} WHERE EXISTS ( SELECT 1 FROM {} WHERE {}.hash_id = {}.hash_id AND EXISTS ( SELECT 1 FROM {} WHERE {}.tag_id = {}.tag_id ) );'.format( hash_ids_table_name, table_name, table_name, hash_ids_table_name, temp_tag_ids_table_name, table_name, temp_tag_ids_table_name ) for table_name in table_names ]
                 
             else:
                 
                 # temp tags to mappings
                 queries = [ 'SELECT hash_id FROM {} CROSS JOIN {} USING ( tag_id );'.format( temp_tag_ids_table_name, table_name ) for table_name in table_names ]
                 
             
             for query in queries:
                 
                 cursor = self._Execute( query )
                 
                 result_hash_ids.update( self._STI( HydrusDB.ReadFromCancellableCursor( cursor, 1024, cancelled_hook ) ) )
                 
             
         
     
     return result_hash_ids