Exemplo n.º 1
0
 def localTest_parsePosterThumbnailData_None(self):
   """ Tests a typical poster page loaded from filesystem. """
   latinStr = 'Operatsiya Y i drugie priklyucheniya Shurika'
   self._assertEquals('Операция Ы и другие приключения Шурика',
       translit.detranslify(latinStr).encode('utf8'), 'Wrong translitirated string')
   latinStr = 'D\'Artanyan i tri mushketyora[kinokopilka].torrent'
   self._assertEquals('Д‘Артанян и три мушкетёра[кинокопилка].торрент',
       translit.detranslify(latinStr).encode('utf8'), 'Wrong translitirated string')
Exemplo n.º 2
0
def score_title(entry, fileyear, medianame, idx):
    score = 90
    Log.Debug('### Start scoring %s (%s) <-> %s (%s)  idx = %s', medianame, fileyear, entry['nameRU'], entry['year'], idx)
    # result position penalty
    if Prefs['search.trust_kp'] is True and idx == 1:
        score = score + 10
    else:
        score = score - idx * const.SCORE_PENALTY_ITEM_ORDER

    yearpenalty = const.SCORE_PENALTY_YEAR / 3  # if we have no year
    mediayear = int(fileyear or 0)
    year = int(re.sub('[^0-9]', '', entry.get('year') or '0') or '0')
    if mediayear != 0 and year != 0:
        yeardiff = abs(mediayear - year)
        if yeardiff < 1:
            score = score + 10
            yearpenalty = 0
        else:
            if yeardiff == 1:
                yearpenalty = int(const.SCORE_PENALTY_YEAR / 4)
            elif yeardiff == 2:
                yearpenalty = int(const.SCORE_PENALTY_YEAR / 3)
            else:
                yearpenalty = yeardiff * int(const.SCORE_PENALTY_YEAR / 2)

    score = score - yearpenalty
    titlepenalty = compute_title_penalty(medianame, clear_title(entry['nameRU']))
    alttitlepenalty = 100
    if 'nameEN' in entry:
        alttitlepenalty = compute_title_penalty(medianame, entry['nameEN'])

    Log.Debug('yearpenalty = %s, titlepenalty = %s, alttitlepenalty = %s', yearpenalty, titlepenalty, alttitlepenalty)

    try:
        detranslifiedmedianame = translit.detranslify(medianame)
        detranslifiedtitlepenalty = compute_title_penalty(detranslifiedmedianame, clear_title(entry['nameRU']))
        titlepenalty = min(detranslifiedtitlepenalty, titlepenalty)

        if 'nameEN' in entry:
            detranslifiedalttitlepenalty = compute_title_penalty(detranslifiedmedianame, entry['nameEN'])
            alttitledetranslified = translit.detranslify(entry['nameEN'])
            reverseddetranslifiedalttitlepenalty = compute_title_penalty(detranslifiedmedianame, alttitledetranslified)
            alttitlepenalty = min(detranslifiedalttitlepenalty, reverseddetranslifiedalttitlepenalty, alttitlepenalty)
    except:
        Log('Error computing title penalty for %s', medianame)

    titlepenalty = min(titlepenalty, alttitlepenalty)
    score = score - titlepenalty

    if check_main_poster(entry['id']) is True:
        score = score - const.SCORE_PENALTY_MAIN_POSTER

    if idx == 0 and score <= 80:
        score = score + 5

    Log.Debug('### End scoring %s', medianame)
    return score if score <= 100 else 100
Exemplo n.º 3
0
 def localTest_parsePosterThumbnailData_None(self):
     """ Tests a typical poster page loaded from filesystem. """
     latinStr = 'Operatsiya Y i drugie priklyucheniya Shurika'
     self._assertEquals('Операция Ы и другие приключения Шурика',
                        translit.detranslify(latinStr).encode('utf8'),
                        'Wrong translitirated string')
     latinStr = 'D\'Artanyan i tri mushketyora[kinokopilka].torrent'
     self._assertEquals('Д‘Артанян и три мушкетёра[кинокопилка].торрент',
                        translit.detranslify(latinStr).encode('utf8'),
                        'Wrong translitirated string')
Exemplo n.º 4
0
def scoreTitle(entry, media, mediaName, idx):
    score = 100

    score = score - (idx * const.SCORE_PENALTY_ITEM_ORDER)

    yearpenalty = const.SCORE_PENALTY_YEAR
    mediayear = int(media.year or 0)
    year = int(re.sub('[^0-9]', '', entry['year']) or 0)
    if mediayear != 0 and year != 0:
        yeardiff = abs(mediayear - year)
        if not yeardiff:
            yearpenalty = 0
        elif yeardiff == 1:
            yearpenalty = int(const.SCORE_PENALTY_YEAR / 4)
        elif yeardiff == 2:
            yearpenalty = int(const.SCORE_PENALTY_YEAR / 3)
    else:
        # If year is unknown, don't penalize the score too much.
        yearpenalty = int(const.SCORE_PENALTY_YEAR / 3)

    score = score - yearpenalty

    titlepenalty = computeTitlePenalty(mediaName, entry['nameRU'])

    alttitlepenalty = 100
    if 'nameEN' in entry:
        alttitlepenalty = computeTitlePenalty(mediaName, entry['nameEN'])

    try:
        detranslifiedmedianame = translit.detranslify(mediaName)
        detranslifiedtitlepenalty = computeTitlePenalty(
            detranslifiedmedianame, entry['nameRU'])
        titlepenalty = min(detranslifiedtitlepenalty, titlepenalty)

        if 'nameEN' in entry:
            detranslifiedalttitlepenalty = computeTitlePenalty(
                detranslifiedmedianame, entry['nameEN'])
            alttitledetranslified = translit.detranslify(entry['nameEN'])
            reverseddetranslifiedalttitlepenalty = computeTitlePenalty(
                detranslifiedmedianame, alttitledetranslified)
            alttitlepenalty = min(detranslifiedalttitlepenalty,
                                  reverseddetranslifiedalttitlepenalty,
                                  alttitlepenalty)
    except:
        pass

    titlepenalty = min(titlepenalty, alttitlepenalty)
    score = score - titlepenalty

    if idx == 0 and score <= 80:
        score = score + 5
    return score
Exemplo n.º 5
0
def scoreTitle(entry, media, mediaName, idx):
    score = 100

    score = score - (idx * const.SCORE_PENALTY_ITEM_ORDER)

    yearpenalty = const.SCORE_PENALTY_YEAR
    mediayear = int(media.year or 0)
    year = int(re.sub('[^0-9]','', entry['year']) or 0)
    if mediayear != 0 and year != 0:
        yeardiff = abs(mediayear - year)
        if not yeardiff:
            yearpenalty = 0
        elif yeardiff == 1:
            yearpenalty = int(const.SCORE_PENALTY_YEAR / 4)
        elif yeardiff == 2:
            yearpenalty = int(const.SCORE_PENALTY_YEAR / 3)
    else:
        # If year is unknown, don't penalize the score too much.
        yearpenalty = int(const.SCORE_PENALTY_YEAR / 3)

    score = score - yearpenalty

    titlepenalty = computeTitlePenalty(mediaName, entry['nameRU'])

    alttitlepenalty = 100
    if 'nameEN' in entry:
        alttitlepenalty = computeTitlePenalty(mediaName, entry['nameEN'])

    try:
        detranslifiedmedianame = translit.detranslify(mediaName)
        detranslifiedtitlepenalty = computeTitlePenalty(detranslifiedmedianame, entry['nameRU'])
        titlepenalty = min(detranslifiedtitlepenalty, titlepenalty)

        if 'nameEN' in entry:
            detranslifiedalttitlepenalty = computeTitlePenalty(detranslifiedmedianame, entry['nameEN'])
            alttitledetranslified = translit.detranslify(entry['nameEN'])
            reverseddetranslifiedalttitlepenalty = computeTitlePenalty(detranslifiedmedianame, alttitledetranslified)
            alttitlepenalty = min(detranslifiedalttitlepenalty, reverseddetranslifiedalttitlepenalty, alttitlepenalty)
    except:
        pass

    titlepenalty = min(titlepenalty, alttitlepenalty)
    score = score - titlepenalty

    if idx == 0 and score <= 80:
        score = score + 5
    return score
Exemplo n.º 6
0
    def fetchAndParseSearchResults(self, mediaName, mediaYear):
        """ Searches for movie titles on KinoPoisk.
        @param mediaName Movie title parsed from a filename.
        @param mediaName Movie year parsed from a filename.
        @return Array of tuples: [kinoPoiskId, title, year, score]
    """
        self.log.Info('Quering kinopoisk...')
        results = self.queryKinoPoisk(mediaName, mediaYear)

        # Check media name is all ASCII characters, and if it is,
        # issue another query to KinoPoisk using a translified media name;
        # lastly, merge the scored results.
        if common.isAsciiString(mediaName):
            translifiedMediaName = translit.detranslify(mediaName)
            moreResults = self.queryKinoPoisk(translifiedMediaName, mediaYear)
            resultsMap = dict()
            for result in results:
                resultsMap[result[0]] = result
            results = [
            ]  # Recreate and repopulate the results array removing duplicates.
            for result in moreResults:
                currId = result[0]
                if currId in resultsMap.keys():
                    origResult = resultsMap[currId]
                    del resultsMap[currId]
                    if result[3] >= origResult[3]:
                        results.append(result)
                    else:
                        results.append(origResult)
                else:
                    results.append(result)
            results.extend(resultsMap.viewvalues())

        # Sort all results based on their score.
        results.sort(key=operator.itemgetter(3))
        results.reverse()
        if self.isDebug:
            self.log.Debug('Search produced %d results:' % len(results))
            index = -1
            for result in results:
                index = index + 1
                self.log.Debug(
                    ' ... %d: score="%d", id="%s", name="%s", year="%s".' %
                    (index, result[3], result[0], result[1], str(result[2])))
        return results
  def fetchAndParseSearchResults(self, mediaName, mediaYear):
    """ Searches for movie titles on KinoPoisk.
        @param mediaName Movie title parsed from a filename.
        @param mediaName Movie year parsed from a filename.
        @return Array of tuples: [kinoPoiskId, title, year, score]
    """
    self.log.Info('Quering kinopoisk...')
    results = self.queryKinoPoisk(mediaName, mediaYear)

    # Check media name is all ASCII characters, and if it is,
    # issue another query to KinoPoisk using a translified media name;
    # lastly, merge the scored results.
    if common.isAsciiString(mediaName):
      translifiedMediaName = translit.detranslify(mediaName)
      moreResults = self.queryKinoPoisk(translifiedMediaName, mediaYear)
      resultsMap = dict()
      for result in results:
        resultsMap[result[0]] = result
      results = [] # Recreate and repopulate the results array removing duplicates.
      for result in moreResults:
        currId = result[0]
        if currId in resultsMap.keys():
          origResult = resultsMap[currId]
          del resultsMap[currId]
          if result[3] >= origResult[3]:
            results.append(result)
          else:
            results.append(origResult)
        else:
          results.append(result)
      results.extend(resultsMap.viewvalues())

    # Sort all results based on their score.
    results.sort(key=operator.itemgetter(3))
    results.reverse()
    if self.isDebug:
      self.log.Debug('Search produced %d results:' % len(results))
      index = -1
      for result in results:
        index += 1
        self.log.Debug(' ... %d: id="%s", name="%s", year="%s", score="%d".' %
            (index, result[0], result[1], str(result[2]), result[3]))
    return results
Exemplo n.º 8
0
def scoreMediaTitleMatch(mediaName, mediaYear, title, altTitle, year, itemIndex):
  """ Compares page and media titles taking into consideration
      media item's year and title values. Returns score [0, 100].
      Search item scores 100 when:
        - it's first on the list of results; AND
        - it equals to the media title (ignoring case) OR all media title words are found in the search item; AND
        - search item year equals to media year.

      For now, our title scoring is pretty simple - we check if individual words
      from media item's title are found in the title from search results.
      We should also take into consideration order of words, so that "One Two" would not
      have the same score as "Two One". Also, taking into consideration year difference.
  """
  if DEBUG_SCORING:
    Log.Debug('>>>>>>> comparing item %d::: "%s (%s)" with "%s (%s)" alt="%s"...' %
        (itemIndex, str(mediaName), str(mediaYear), str(title), str(year), str(altTitle)))
  # Max score is when both title and year match exactly.
  score = 100

  # Item order penalty (the lower it is on the list or results, the larger the penalty).
  score = score - (itemIndex * SCORE_PENALTY_ITEM_ORDER)

  # Compute year penalty: [equal, diff>=3] --> [0, MAX].
  yearPenalty = SCORE_PENALTY_YEAR
  mediaYear = toInteger(mediaYear)
  year = toInteger(year)
  if mediaYear is not None and year is not None:
    yearDiff = abs(mediaYear - year)
    if not yearDiff:
      yearPenalty = 0
    elif yearDiff == 1:
      yearPenalty = int(SCORE_PENALTY_YEAR / 4)
    elif yearDiff == 2:
      yearPenalty = int(SCORE_PENALTY_YEAR / 3)
  else:
    # If year is unknown, don't penalize the score too much.
    yearPenalty = int(SCORE_PENALTY_YEAR / 3)
  score = score - yearPenalty

  # Compute title penalty.
  titlePenalty = computeTitlePenalty(mediaName, title)
  altTitlePenalty = 100
  if altTitle is not None:
    altTitlePenalty = computeTitlePenalty(mediaName, altTitle)

  # Get detranlitirated media name (in case filename is in latin characters),
  # compare it's score with the original, and pick the min.
  try:
    detranslifiedMediaName = translit.detranslify(mediaName)
    detranslifiedTitlePenalty = computeTitlePenalty(detranslifiedMediaName, title)
    titlePenalty = min(detranslifiedTitlePenalty, titlePenalty)
    if DEBUG_SCORING:
      Log.Debug('Comparing title penalties: %d (original) and %d (detranslified).' %
              (titlePenalty, detranslifiedTitlePenalty))
    if altTitle is not None:
      detranslifiedAltTitlePenalty = computeTitlePenalty(detranslifiedMediaName, altTitle)
      altTitleDetranslified = translit.detranslify(altTitle)
      reversedDetranslifiedAltTitlePenalty = computeTitlePenalty(
        detranslifiedMediaName, altTitleDetranslified)
      altTitlePenalty = min(detranslifiedAltTitlePenalty, reversedDetranslifiedAltTitlePenalty, altTitlePenalty)
      if DEBUG_SCORING:
        Log.Debug('Comparing alt title penalties: %d (1), %d (2), and %d (3).' %
                  (detranslifiedAltTitlePenalty, reversedDetranslifiedAltTitlePenalty, altTitlePenalty))
  except:
    pass

  titlePenalty = min(titlePenalty, altTitlePenalty)
  if DEBUG_SCORING:
    Log.Debug('Picked the lowest title penalty: %d.' % titlePenalty)
  score = score - titlePenalty

  # If the score is not high enough, add a few points to the first result -
  # let's give KinoPoisk some credit :-).
  if itemIndex == 0 and score <= 80:
    score = score + 5

  # IMPORTANT: always return an int.
  score = int(score)
  if DEBUG_SCORING:
    Log.Debug('***** title scored %d' % score)
  return score
Exemplo n.º 9
0
def scoreMediaTitleMatch(mediaName, mediaYear, title, altTitle, year,
                         itemIndex):
    """ Compares page and media titles taking into consideration
      media item's year and title values. Returns score [0, 100].
      Search item scores 100 when:
        - it's first on the list of results; AND
        - it equals to the media title (ignoring case) OR all media title words are found in the search item; AND
        - search item year equals to media year.

      For now, our title scoring is pretty simple - we check if individual words
      from media item's title are found in the title from search results.
      We should also take into consideration order of words, so that "One Two" would not
      have the same score as "Two One". Also, taking into consideration year difference.
  """
    if DEBUG_SCORING:
        Log.Debug(
            '>>>>>>> comparing item %d::: "%s (%s)" with "%s (%s)" alt="%s"...'
            % (itemIndex, str(mediaName), str(mediaYear), str(title),
               str(year), str(altTitle)))
    # Max score is when both title and year match exactly.
    score = 100

    # Item order penalty (the lower it is on the list or results, the larger the penalty).
    score = score - (itemIndex * SCORE_PENALTY_ITEM_ORDER)

    # Compute year penalty: [equal, diff>=3] --> [0, MAX].
    yearPenalty = SCORE_PENALTY_YEAR
    mediaYear = toInteger(mediaYear)
    year = toInteger(year)
    if mediaYear is not None and year is not None:
        yearDiff = abs(mediaYear - year)
        if not yearDiff:
            yearPenalty = 0
        elif yearDiff == 1:
            yearPenalty = int(SCORE_PENALTY_YEAR / 4)
        elif yearDiff == 2:
            yearPenalty = int(SCORE_PENALTY_YEAR / 3)
    else:
        # If year is unknown, don't penalize the score too much.
        yearPenalty = int(SCORE_PENALTY_YEAR / 3)
    score = score - yearPenalty

    # Compute title penalty.
    titlePenalty = computeTitlePenalty(mediaName, title)
    altTitlePenalty = 100
    if altTitle is not None:
        altTitlePenalty = computeTitlePenalty(mediaName, altTitle)

    # Get detranlitirated media name (in case filename is in latin characters),
    # compare it's score with the original, and pick the min.
    try:
        detranslifiedMediaName = translit.detranslify(mediaName)
        detranslifiedTitlePenalty = computeTitlePenalty(
            detranslifiedMediaName, title)
        titlePenalty = min(detranslifiedTitlePenalty, titlePenalty)
        if DEBUG_SCORING:
            Log.Debug(
                'Comparing title penalties: %d (original) and %d (detranslified).'
                % (titlePenalty, detranslifiedTitlePenalty))
        if altTitle is not None:
            detranslifiedAltTitlePenalty = computeTitlePenalty(
                detranslifiedMediaName, altTitle)
            altTitleDetranslified = translit.detranslify(altTitle)
            reversedDetranslifiedAltTitlePenalty = computeTitlePenalty(
                detranslifiedMediaName, altTitleDetranslified)
            altTitlePenalty = min(detranslifiedAltTitlePenalty,
                                  reversedDetranslifiedAltTitlePenalty,
                                  altTitlePenalty)
            if DEBUG_SCORING:
                Log.Debug(
                    'Comparing alt title penalties: %d (1), %d (2), and %d (3).'
                    % (detranslifiedAltTitlePenalty,
                       reversedDetranslifiedAltTitlePenalty, altTitlePenalty))
    except:
        pass

    titlePenalty = min(titlePenalty, altTitlePenalty)
    if DEBUG_SCORING:
        Log.Debug('Picked the lowest title penalty: %d.' % titlePenalty)
    score = score - titlePenalty

    # If the score is not high enough, add a few points to the first result -
    # let's give KinoPoisk some credit :-).
    if itemIndex == 0 and score <= 80:
        score = score + 5

    # IMPORTANT: always return an int.
    score = int(score)
    if DEBUG_SCORING:
        Log.Debug('***** title scored %d' % score)
    return score
Exemplo n.º 10
0
        tables.update({table_name: ['id integer primary key not null']})

pprint(tables)

for line in [
        x.strip() for x in open("digraphg.dot").readlines()
        if x.find('>') != -1
]:
    if line.count('->') == 1:
        table, field = [to_translit(x.strip('"\' ')) for x in line.split('->')]
        if field.count('['):
            field = field.split('[')[0].strip('_"')
        tables[table] += ['%s text' % field]

for table in tables:
    such_name = detranslify(table)
    if len(tables[table]) == 2:
        atr_name = tables[table][-1].split()[0]
        atr_name = detranslify(atr_name)
        text += [choice(atr_1) % (such_name, atr_name)]

    elif len(tables[table]) > 2:
        atr_names = '", "'.join(
            [detranslify(x.split()[0]) for x in tables[table][1:]])
        text += [choice(atr_2) % (such_name, atr_names)]

pprint(tables)

one2many = []

for line in [