Exemplo n.º 1
0
def __find_best_series(book, config):
    ''' 
   Queries the databse to find a best guess for a series matching the given
   ComicBook, based on its name, year, issue number, and other text attributes.
   
   Returns SeriesRef if a reasonable guess was found, or None if one wasn't.
   '''

    # 1. obtain SeriesRefs for this book, removing some as dictated by prefs
    series_refs = db.query_series_refs(book.series_s,
                                       config.ignored_searchterms_sl)
    series_refs = dbutils.filter_series_refs(series_refs,
                                             config.ignored_publishers_sl,
                                             config.ignored_before_year_n,
                                             config.ignored_after_year_n,
                                             config.never_ignore_threshold_n)

    # 2. obtain the first, second, and third best matching SeriesRefs for the
    #    given book, if there are any.
    primary = None
    secondary = None
    tertiary = None
    if len(series_refs) > 0:
        mscore = MatchScore()

        def find_best_score(refs):
            return reduce(
                lambda x, y: x if mscore.compute_n(book, x) >= mscore.
                compute_n(book, y) else y, refs) if refs else None

        primary = find_best_score(series_refs)
        if primary:
            series_refs.remove(primary)
            secondary = find_best_score(series_refs)
            if secondary:
                series_refs.remove(secondary)
                tertiary = find_best_score(series_refs)

        # 3. if our book is the first (or unknown) issue, figure out if the best
        #    matching series has a similar cover to the second or third best.
        #    if it does, we're probably dealing with a trade paperback and a
        #    regular issue, and we can't find the best series reliably, so we bail
        is_first_issue = (lambda i : not i or \
           (utils.is_number(i) and float(i)==1.0))(book.issue_num_s)
        if is_first_issue and primary and secondary:
            too_similar = False
            SIMILARITY_THRESHOLD = __MATCH_THRESHOLD - 0.10
            hash1 = __get_remote_hash(primary)
            hash2 = __get_remote_hash(secondary)
            if imagehash.similarity(hash1, hash2) > SIMILARITY_THRESHOLD:
                too_similar = True
            elif tertiary:
                hash3 = __get_remote_hash(tertiary)
                if imagehash.similarity(hash1, hash3) > SIMILARITY_THRESHOLD:
                    too_similar = True
            primary = None if too_similar else primary

    return primary
Exemplo n.º 2
0
def __find_best_series(book, config):      
   ''' 
   Queries the databse to find a best guess for a series matching the given
   ComicBook, based on its name, year, issue number, and other text attributes.
   
   Returns SeriesRef if a reasonable guess was found, or None if one wasn't.
   '''
   
   # 1. obtain SeriesRefs for this book, removing some as dictated by prefs
   series_refs = db.query_series_refs( book.series_s, 
      config.ignored_searchterms_sl )
   series_refs = dbutils.filter_series_refs( 
         series_refs,
         config.ignored_publishers_sl, 
         config.ignored_before_year_n,
         config.ignored_after_year_n,
         config.never_ignore_threshold_n)

   # 2. obtain the first, second, and third best matching SeriesRefs for the
   #    given book, if there are any.
   primary = None
   secondary = None 
   tertiary = None   
   if len(series_refs) > 0:
      mscore = MatchScore()
      def find_best_score( refs ):
         return reduce( lambda x,y: x if mscore.compute_n(book, x) 
            >= mscore.compute_n(book,y) else y, refs) if refs else None
      primary = find_best_score(series_refs)
      if primary:
         series_refs.remove(primary)
         secondary = find_best_score(series_refs)
         if secondary:
            series_refs.remove(secondary)
            tertiary = find_best_score(series_refs)
      
      # 3. if our book is the first (or unknown) issue, figure out if the best  
      #    matching series has a similar cover to the second or third best.
      #    if it does, we're probably dealing with a trade paperback and a 
      #    regular issue, and we can't find the best series reliably, so we bail
      is_first_issue = (lambda i : not i or \
         (utils.is_number(i) and float(i)==1.0))(book.issue_num_s)
      if is_first_issue and primary and secondary:
         too_similar = False
         SIMILARITY_THRESHOLD = __MATCH_THRESHOLD - 0.10
         hash1 = __get_remote_hash(primary)
         hash2 = __get_remote_hash(secondary)
         if imagehash.similarity(hash1, hash2) > SIMILARITY_THRESHOLD:
            too_similar = True
         elif tertiary:
            hash3 = __get_remote_hash(tertiary)
            if imagehash.similarity(hash1, hash3) > SIMILARITY_THRESHOLD:
               too_similar = True
         primary = None if too_similar else primary
      
   return primary
Exemplo n.º 3
0
 def are_the_same(hash1, hash2):
     x = imagehash.similarity(hash1, hash2)
     return x > __MATCH_THRESHOLD
Exemplo n.º 4
0
 def are_the_same(hash1, hash2):
    x = imagehash.similarity(hash1, hash2)
    return x > __MATCH_THRESHOLD