def process(mtree):
    eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]
    if eps:
        match_from_epnum_position(mtree, eps[0])

    else:
        # if we don't have the episode number, but at least 2 groups in the
        # basename, then it's probably series - eptitle
        basename = mtree.node_at((-2, ))
        title_candidates = [
            n for n in basename.unidentified_leaves()
            if n.clean_value.lower() not in non_episode_title
        ]

        if len(title_candidates) >= 2:
            found_property(title_candidates[0], 'series', 0.4)
            found_property(title_candidates[1], 'title', 0.4)
        elif len(title_candidates) == 1:
            # but if there's only one candidate, it's probably the series name
            found_property(title_candidates[0], 'series', 0.4)

    # if we only have 1 remaining valid group in the folder containing the
    # file, then it's likely that it is the series name
    try:
        series_candidates = mtree.node_at((-3, )).unidentified_leaves()
    except ValueError:
        series_candidates = []

    if len(series_candidates) == 1:
        found_property(series_candidates[0], 'series', 0.3)

    # if there's a path group that only contains the season info, then the
    # previous one is most likely the series title (ie: ../series/season X/..)
    eps = [
        node for node in mtree.nodes()
        if 'season' in node.guess and 'episodeNumber' not in node.guess
    ]

    if eps:
        previous = [
            node for node in mtree.unidentified_leaves()
            if node.node_idx[0] == eps[0].node_idx[0] - 1
        ]
        if len(previous) == 1:
            found_property(previous[0], 'series', 0.5)

    # reduce the confidence of unlikely series
    for node in mtree.nodes():
        if 'series' in node.guess:
            if node.guess['series'].lower() in unlikely_series:
                new_confidence = node.guess.confidence('series') * 0.5
                node.guess.set_confidence('series', new_confidence)
Пример #2
0
def process(mtree):
    def previous_group(g):
        for leaf in mtree.unidentified_leaves()[::-1]:
            if leaf.node_idx < g.node_idx:
                return leaf

    def next_group(g):
        for leaf in mtree.unidentified_leaves():
            if leaf.node_idx > g.node_idx:
                return leaf

    def same_group(g1, g2):
        return g1.node_idx[:2] == g2.node_idx[:2]

    bonus = [ node for node in mtree.leaves() if 'bonusNumber' in node.guess ]
    if bonus:
        bonusTitle = next_group(bonus[0])
        if same_group(bonusTitle, bonus[0]):
            found_property(bonusTitle, 'bonusTitle', 0.8)

    filmNumber = [ node for node in mtree.leaves()
                   if 'filmNumber' in node.guess ]
    if filmNumber:
        filmSeries = previous_group(filmNumber[0])
        found_property(filmSeries, 'filmSeries', 0.9)

        title = next_group(filmNumber[0])
        found_property(title, 'title', 0.9)

    season = [ node for node in mtree.leaves() if 'season' in node.guess ]
    if season and 'bonusNumber' in mtree.info:
        series = previous_group(season[0])
        if same_group(series, season[0]):
            found_property(series, 'series', 0.9)
    def process(self, mtree):
        """
        try to identify the remaining unknown groups by looking at their
        position relative to other known elements
        """
        eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]
        if eps:
            self.match_from_epnum_position(mtree, eps[0])

        else:
            # if we don't have the episode number, but at least 2 groups in the
            # basename, then it's probably series - eptitle
            basename = mtree.node_at((-2,))
            title_candidates = [n for n in basename.unidentified_leaves()
                                 if n.clean_value.lower() not in non_episode_title]

            if len(title_candidates) >= 2:
                found_property(title_candidates[0], 'series', 0.4)
                found_property(title_candidates[1], 'title', 0.4)
            elif len(title_candidates) == 1:
                # but if there's only one candidate, it's probably the series name
                found_property(title_candidates[0], 'series', 0.4)

        # if we only have 1 remaining valid group in the folder containing the
        # file, then it's likely that it is the series name
        try:
            series_candidates = mtree.node_at((-3,)).unidentified_leaves()
        except ValueError:
            series_candidates = []

        if len(series_candidates) == 1:
            found_property(series_candidates[0], 'series', 0.3)

        # if there's a path group that only contains the season info, then the
        # previous one is most likely the series title (ie: ../series/season X/..)
        eps = [node for node in mtree.nodes()
               if 'season' in node.guess and 'episodeNumber' not in node.guess]

        if eps:
            previous = [node for node in mtree.unidentified_leaves()
                        if node.node_idx[0] == eps[0].node_idx[0] - 1]
            if len(previous) == 1:
                found_property(previous[0], 'series', 0.5)

        # reduce the confidence of unlikely series
        for node in mtree.nodes():
            if 'series' in node.guess:
                if node.guess['series'].lower() in unlikely_series:
                    new_confidence = node.guess.confidence('series') * 0.5
                    node.guess.set_confidence('series', new_confidence)
Пример #4
0
def process(mtree):
    def previous_group(g):
        for leaf in mtree.unidentified_leaves()[::-1]:
            if leaf.node_idx < g.node_idx:
                return leaf

    def next_group(g):
        for leaf in mtree.unidentified_leaves():
            if leaf.node_idx > g.node_idx:
                return leaf

    def same_group(g1, g2):
        return g1.node_idx[:2] == g2.node_idx[:2]

    bonus = [node for node in mtree.leaves() if 'bonusNumber' in node.guess]
    if bonus:
        bonusTitle = next_group(bonus[0])
        if same_group(bonusTitle, bonus[0]):
            found_property(bonusTitle, 'bonusTitle', 0.8)

    filmNumber = [
        node for node in mtree.leaves() if 'filmNumber' in node.guess
    ]
    if filmNumber:
        filmSeries = previous_group(filmNumber[0])
        found_property(filmSeries, 'filmSeries', 0.9)

        title = next_group(filmNumber[0])
        found_property(title, 'title', 0.9)

    season = [node for node in mtree.leaves() if 'season' in node.guess]
    if season and 'bonusNumber' in mtree.info:
        series = previous_group(season[0])
        if same_group(series, season[0]):
            found_property(series, 'series', 0.9)
    def match_from_epnum_position(self, mtree, node):
        epnum_idx = node.node_idx

        # a few helper functions to be able to filter using high-level semantics
        def before_epnum_in_same_pathgroup():
            return [leaf for leaf in mtree.unidentified_leaves()
                     if (leaf.node_idx[0] == epnum_idx[0] and
                         leaf.node_idx[1:] < epnum_idx[1:])]

        def after_epnum_in_same_pathgroup():
            return [leaf for leaf in mtree.unidentified_leaves()
                     if (leaf.node_idx[0] == epnum_idx[0] and
                         leaf.node_idx[1:] > epnum_idx[1:])]

        def after_epnum_in_same_explicitgroup():
            return [leaf for leaf in mtree.unidentified_leaves()
                     if (leaf.node_idx[:2] == epnum_idx[:2] and
                         leaf.node_idx[2:] > epnum_idx[2:])]

        # epnumber is the first group and there are only 2 after it in same
        # path group
        # -> series title - episode title
        title_candidates = [n for n in after_epnum_in_same_pathgroup()
                             if n.clean_value.lower() not in non_episode_title]
        if ('title' not in mtree.info and  # no title
            before_epnum_in_same_pathgroup() == [] and  # no groups before
            len(title_candidates) == 2):  # only 2 groups after

            found_property(title_candidates[0], 'series', confidence=0.4)
            found_property(title_candidates[1], 'title', confidence=0.4)
            return

        # if we have at least 1 valid group before the episodeNumber, then it's
        # probably the series name
        series_candidates = before_epnum_in_same_pathgroup()
        if len(series_candidates) >= 1:
            found_property(series_candidates[0], 'series', confidence=0.7)

        # only 1 group after (in the same path group) and it's probably the
        # episode title
        title_candidates = [n for n in after_epnum_in_same_pathgroup()
                             if n.clean_value.lower() not in non_episode_title]

        if len(title_candidates) == 1:
            found_property(title_candidates[0], 'title', confidence=0.5)
            return
        else:
            # try in the same explicit group, with lower confidence
            title_candidates = [n for n in after_epnum_in_same_explicitgroup()
                                if n.clean_value.lower() not in non_episode_title]
            if len(title_candidates) == 1:
                found_property(title_candidates[0], 'title', confidence=0.4)
                return
            elif len(title_candidates) > 1:
                found_property(title_candidates[0], 'title', confidence=0.3)
                return

        # get the one with the longest value
        title_candidates = [n for n in after_epnum_in_same_pathgroup()
                            if n.clean_value.lower() not in non_episode_title]
        if title_candidates:
            maxidx = -1
            maxv = -1
            for i, c in enumerate(title_candidates):
                if len(c.clean_value) > maxv:
                    maxidx = i
                    maxv = len(c.clean_value)
            found_property(title_candidates[maxidx], 'title', confidence=0.3)
def match_from_epnum_position(mtree, node):
    epnum_idx = node.node_idx

    # a few helper functions to be able to filter using high-level semantics
    def before_epnum_in_same_pathgroup():
        return [ leaf for leaf in mtree.unidentified_leaves()
                 if (leaf.node_idx[0] == epnum_idx[0] and
                     leaf.node_idx[1:] < epnum_idx[1:]) ]

    def after_epnum_in_same_pathgroup():
        return [ leaf for leaf in mtree.unidentified_leaves()
                 if (leaf.node_idx[0] == epnum_idx[0] and
                     leaf.node_idx[1:] > epnum_idx[1:]) ]

    def after_epnum_in_same_explicitgroup():
        return [ leaf for leaf in mtree.unidentified_leaves()
                 if (leaf.node_idx[:2] == epnum_idx[:2] and
                     leaf.node_idx[2:] > epnum_idx[2:]) ]

    # epnumber is the first group and there are only 2 after it in same
    # path group
    # -> series title - episode title
    title_candidates = [ n for n in after_epnum_in_same_pathgroup()
                         if n.clean_value.lower() not in non_episode_title ]
    if ('title' not in mtree.info and                # no title
        before_epnum_in_same_pathgroup() == [] and   # no groups before
        len(title_candidates) == 2):                 # only 2 groups after

        found_property(title_candidates[0], 'series', confidence=0.4)
        found_property(title_candidates[1], 'title', confidence=0.4)
        return

    # if we have at least 1 valid group before the episodeNumber, then it's
    # probably the series name
    series_candidates = before_epnum_in_same_pathgroup()
    if len(series_candidates) >= 1:
        found_property(series_candidates[0], 'series', confidence=0.7)

    # only 1 group after (in the same path group) and it's probably the
    # episode title
    title_candidates = [ n for n in after_epnum_in_same_pathgroup()
                         if n.clean_value.lower() not in non_episode_title ]

    if len(title_candidates) == 1:
        found_property(title_candidates[0], 'title', confidence=0.5)
        return
    else:
        # try in the same explicit group, with lower confidence
        title_candidates = [ n for n in after_epnum_in_same_explicitgroup()
                             if n.clean_value.lower() not in non_episode_title
                             ]
        if len(title_candidates) == 1:
            found_property(title_candidates[0], 'title', confidence=0.4)
            return
        elif len(title_candidates) > 1:
            found_property(title_candidates[0], 'title', confidence=0.3)
            return

    # get the one with the longest value
    title_candidates = [ n for n in after_epnum_in_same_pathgroup()
                         if n.clean_value.lower() not in non_episode_title ]
    if title_candidates:
        maxidx = -1
        maxv = -1
        for i, c in enumerate(title_candidates):
            if len(c.clean_value) > maxv:
                maxidx = i
                maxv = len(c.clean_value)
        found_property(title_candidates[maxidx], 'title', confidence=0.3)