Пример #1
0
    def process(self, mtree, options=None):
        """
        try to identify the remaining unknown groups by looking at their
        position relative to other known elements
        """
        eps = [
            node for node in mtree.leaves() if 'episodeNumber' in node.guess
        ]
        if eps:
            self.match_from_epnum_position(mtree, eps[0])

        else:
            # if we don't have the episode number, but at least 2 groups in the
            # basename, then it's probably series - eptitle
            basename = mtree.node_at((-2, ))

            title_candidates = self._filter_candidates(
                basename.unidentified_leaves())

            if len(title_candidates) >= 2:
                found_property(title_candidates[0], 'series', confidence=0.4)
                found_property(title_candidates[1], 'title', confidence=0.4)
            elif len(title_candidates) == 1:
                # but if there's only one candidate, it's probably the series name
                found_property(title_candidates[0], 'series', confidence=0.4)

        # if we only have 1 remaining valid group in the folder containing the
        # file, then it's likely that it is the series name
        try:
            series_candidates = list(
                mtree.node_at((-3, )).unidentified_leaves())
        except ValueError:
            series_candidates = []

        if len(series_candidates) == 1:
            found_property(series_candidates[0], 'series', confidence=0.3)

        # if there's a path group that only contains the season info, then the
        # previous one is most likely the series title (ie: ../series/season X/..)
        eps = [
            node for node in mtree.nodes()
            if 'season' in node.guess and 'episodeNumber' not in node.guess
        ]

        if eps:
            previous = [
                node for node in mtree.unidentified_leaves()
                if node.node_idx[0] == eps[0].node_idx[0] - 1
            ]
            if len(previous) == 1:
                found_property(previous[0], 'series', confidence=0.5)

        # If we have found title without any serie name, replace it by the serie name.
        if 'series' not in mtree.info and 'title' in mtree.info:
            title_leaf = mtree.first_leaf_containing('title')
            metadata = title_leaf.guess.metadata('title')
            value = title_leaf.guess['title']
            del title_leaf.guess['title']
            title_leaf.guess.set('series', value, metadata=metadata)
Пример #2
0
 def process(self, mtree, options=None):
     GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
     proper_count = 0
     for other_leaf in mtree.leaves_containing('other'):
         if 'other' in other_leaf.info and 'Proper' in other_leaf.info['other']:
             proper_count += 1
     if proper_count:
         found_property(mtree, 'properCount', proper_count)
Пример #3
0
 def process(self, mtree, options=None):
     GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
     proper_count = 0
     for other_leaf in mtree.leaves_containing("other"):
         if "other" in other_leaf.info and "Proper" in other_leaf.info["other"]:
             proper_count += 1
     if proper_count:
         found_property(mtree, "properCount", proper_count)
Пример #4
0
 def process(self, mtree, options=None):
     GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
     proper_count = 0
     for other_leaf in mtree.leaves_containing('other'):
         if 'other' in other_leaf.info and 'Proper' in other_leaf.info['other']:
             proper_count += 1
     if proper_count:
         found_property(mtree, 'properCount', proper_count)
    def process(self, mtree, options=None):
        """
        try to identify the remaining unknown groups by looking at their
        position relative to other known elements
        """
        eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]

        if not eps:
            eps = [node for node in mtree.leaves() if 'date' in node.guess]

        eps = sorted(eps, key=lambda ep: -ep.guess.confidence())
        if eps:
            self.match_from_epnum_position(mtree, eps[0], options)

        else:
            # if we don't have the episode number, but at least 2 groups in the
            # basename, then it's probably series - eptitle
            basename = list(filter(lambda x: x.category == 'path', mtree.nodes()))[-2]

            title_candidates = self._filter_candidates(basename.unidentified_leaves(), options)

            if len(title_candidates) >= 2 and 'series' not in mtree.info:
                found_property(title_candidates[0], 'series', confidence=0.4)
                found_property(title_candidates[1], 'title', confidence=0.4)
            elif len(title_candidates) == 1:
                # but if there's only one candidate, it's probably the series name
                found_property(title_candidates[0], 'series' if 'series' not in mtree.info else 'title', confidence=0.4)

        # if we only have 1 remaining valid group in the folder containing the
        # file, then it's likely that it is the series name
        path_nodes = list(filter(lambda x: x.category == 'path', mtree.nodes()))
        try:
            series_candidates = list(path_nodes[-3].unidentified_leaves())
        except IndexError:
            series_candidates = []

        if len(series_candidates) == 1:
            found_property(series_candidates[0], 'series', confidence=0.3)

        # if there's a path group that only contains the season info, then the
        # previous one is most likely the series title (ie: ../series/season X/..)
        eps = [node for node in mtree.nodes()
               if 'season' in node.guess and 'episodeNumber' not in node.guess]

        if eps:
            previous = [node for node in mtree.unidentified_leaves()
                        if node.node_idx[0] == eps[0].node_idx[0] - 1]
            if len(previous) == 1:
                found_property(previous[0], 'series', confidence=0.5)

        # If we have found title without any serie name, replace it by the serie name.
        if 'series' not in mtree.info and 'title' in mtree.info:
            title_leaf = mtree.first_leaf_containing('title')
            metadata = title_leaf.guess.metadata('title')
            value = title_leaf.guess['title']
            del title_leaf.guess['title']
            title_leaf.guess.set('series', value, metadata=metadata)
Пример #6
0
    def process(self, mtree, options=None):
        def previous_group(g):
            for leaf in reversed(list(mtree.unidentified_leaves())):
                if leaf.node_idx < g.node_idx:
                    return leaf

        def next_group(g):
            for leaf in mtree.unidentified_leaves():
                if leaf.node_idx > g.node_idx:
                    return leaf

        def same_group(g1, g2):
            return g1.node_idx[:2] == g2.node_idx[:2]

        bonus = [node for node in mtree.leaves() if 'bonusNumber' in node.guess]
        if bonus:
            bonus_title = next_group(bonus[0])
            if bonus_title and same_group(bonus_title, bonus[0]):
                found_property(bonus_title, 'bonusTitle', confidence=0.8)

        film_number = [node for node in mtree.leaves()
                       if 'filmNumber' in node.guess]
        if film_number:
            film_series = previous_group(film_number[0])
            found_property(film_series, 'filmSeries', confidence=0.9)

            title = next_group(film_number[0])
            found_property(title, 'title', confidence=0.9)

        season = [node for node in mtree.leaves() if 'season' in node.guess]
        if season and 'bonusNumber' in mtree.info:
            series = previous_group(season[0])
            if same_group(series, season[0]):
                found_property(series, 'series', confidence=0.9)
    def process(self, mtree, options=None):
        """
        try to identify the remaining unknown groups by looking at their
        position relative to other known elements
        """
        eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]
        if eps:
            self.match_from_epnum_position(mtree, eps[0])

        else:
            # if we don't have the episode number, but at least 2 groups in the
            # basename, then it's probably series - eptitle
            basename = mtree.node_at((-2,))

            title_candidates = self._filter_candidates(basename.unidentified_leaves())

            if len(title_candidates) >= 2:
                found_property(title_candidates[0], 'series', confidence=0.4)
                found_property(title_candidates[1], 'title', confidence=0.4)
            elif len(title_candidates) == 1:
                # but if there's only one candidate, it's probably the series name
                found_property(title_candidates[0], 'series', confidence=0.4)

        # if we only have 1 remaining valid group in the folder containing the
        # file, then it's likely that it is the series name
        try:
            series_candidates = mtree.node_at((-3,)).unidentified_leaves()
        except ValueError:
            series_candidates = []

        if len(series_candidates) == 1:
            found_property(series_candidates[0], 'series', confidence=0.3)

        # if there's a path group that only contains the season info, then the
        # previous one is most likely the series title (ie: ../series/season X/..)
        eps = [node for node in mtree.nodes()
               if 'season' in node.guess and 'episodeNumber' not in node.guess]

        if eps:
            previous = [node for node in mtree.unidentified_leaves()
                        if node.node_idx[0] == eps[0].node_idx[0] - 1]
            if len(previous) == 1:
                found_property(previous[0], 'series', confidence=0.5)
Пример #8
0
    def match_from_epnum_position(self, mtree, node):
        epnum_idx = node.node_idx

        # a few helper functions to be able to filter using high-level semantics
        def before_epnum_in_same_pathgroup():
            return [
                leaf for leaf in mtree.unidentified_leaves()
                if (leaf.node_idx[0] == epnum_idx[0]
                    and leaf.node_idx[1:] < epnum_idx[1:])
            ]

        def after_epnum_in_same_pathgroup():
            return [
                leaf for leaf in mtree.unidentified_leaves()
                if (leaf.node_idx[0] == epnum_idx[0]
                    and leaf.node_idx[1:] > epnum_idx[1:])
            ]

        def after_epnum_in_same_explicitgroup():
            return [
                leaf for leaf in mtree.unidentified_leaves()
                if (leaf.node_idx[:2] == epnum_idx[:2]
                    and leaf.node_idx[2:] > epnum_idx[2:])
            ]

        # epnumber is the first group and there are only 2 after it in same
        # path group
        # -> series title - episode title
        title_candidates = self._filter_candidates(
            after_epnum_in_same_pathgroup())

        if ('title' not in mtree.info and  # no title
                before_epnum_in_same_pathgroup() == [] and  # no groups before
                len(title_candidates) == 2):  # only 2 groups after

            found_property(title_candidates[0], 'series', confidence=0.4)
            found_property(title_candidates[1], 'title', confidence=0.4)
            return

        # if we have at least 1 valid group before the episodeNumber, then it's
        # probably the series name
        series_candidates = before_epnum_in_same_pathgroup()
        if len(series_candidates) >= 1:
            found_property(series_candidates[0], 'series', confidence=0.7)

        # only 1 group after (in the same path group) and it's probably the
        # episode title.
        title_candidates = self._filter_candidates(
            after_epnum_in_same_pathgroup())
        if len(title_candidates) == 1:
            found_property(title_candidates[0], 'title', confidence=0.5)
            return
        else:
            # try in the same explicit group, with lower confidence
            title_candidates = self._filter_candidates(
                after_epnum_in_same_explicitgroup())
            if len(title_candidates) == 1:
                found_property(title_candidates[0], 'title', confidence=0.4)
                return
            elif len(title_candidates) > 1:
                found_property(title_candidates[0], 'title', confidence=0.3)
                return

        # get the one with the longest value
        title_candidates = self._filter_candidates(
            after_epnum_in_same_pathgroup())
        if title_candidates:
            maxidx = -1
            maxv = -1
            for i, c in enumerate(title_candidates):
                if len(c.clean_value) > maxv:
                    maxidx = i
                    maxv = len(c.clean_value)
            found_property(title_candidates[maxidx], 'title', confidence=0.3)
    def process(self, mtree, options=None):
        """
        try to identify the remaining unknown groups by looking at their
        position relative to other known elements
        """
        if 'title' in mtree.info:
            return

        path_nodes = list(filter(lambda x: x.category == 'path', mtree.nodes()))

        basename = path_nodes[-2]
        all_valid = lambda leaf: len(leaf.clean_value) > 0
        basename_leftover = list(basename.unidentified_leaves(valid=all_valid))

        try:
            folder = path_nodes[-3]
            folder_leftover = list(folder.unidentified_leaves())
        except IndexError:
            folder = None
            folder_leftover = []

        self.log.debug('folder: %s' % u(folder_leftover))
        self.log.debug('basename: %s' % u(basename_leftover))

        # specific cases:
        # if we find the same group both in the folder name and the filename,
        # it's a good candidate for title
        if (folder_leftover and basename_leftover and
                        folder_leftover[0].clean_value == basename_leftover[0].clean_value and
                        not GuessMovieTitleFromPosition.excluded_word(folder_leftover[0])):
            found_property(folder_leftover[0], 'title', confidence=0.8)
            return

        # specific cases:
        # if the basename contains a number first followed by an unidentified
        # group, and the folder only contains 1 unidentified one, then we have
        # a series
        # ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
        if len(folder_leftover) > 0 and len(basename_leftover) > 1:
            series = folder_leftover[0]
            film_number = basename_leftover[0]
            title = basename_leftover[1]

            basename_leaves = list(basename.leaves())

            num = None
            try:
                num = int(film_number.clean_value)
            except ValueError:
                pass

            if num:
                self.log.debug('series: %s' % series.clean_value)
                self.log.debug('title: %s' % title.clean_value)
                if (series.clean_value != title.clean_value and
                            series.clean_value != film_number.clean_value and
                            basename_leaves.index(film_number) == 0 and
                            basename_leaves.index(title) == 1 and
                            not GuessMovieTitleFromPosition.excluded_word(title, series)):

                    found_property(title, 'title', confidence=0.6)
                    found_property(series, 'filmSeries', confidence=0.6)
                    found_property(film_number, 'filmNumber', num, confidence=0.6)
                return

        if folder:
            year_group = folder.first_leaf_containing('year')
            if year_group:
                groups_before = folder.previous_unidentified_leaves(year_group)
                if groups_before:
                    try:
                        node = next(groups_before)
                        if not GuessMovieTitleFromPosition.excluded_word(node):
                            found_property(node, 'title', confidence=0.8)
                            return
                    except StopIteration:
                        pass

        # if we have either format or videoCodec in the folder containing the
        # file or one of its parents, then we should probably look for the title
        # in there rather than in the basename
        try:
            props = list(mtree.previous_leaves_containing(mtree.children[-2],
                                                          ['videoCodec',
                                                           'format',
                                                           'language']))
        except IndexError:
            props = []

        if props:
            group_idx = props[0].node_idx[0]
            if all(g.node_idx[0] == group_idx for g in props):
                # if they're all in the same group, take leftover info from there
                leftover = mtree.node_at((group_idx,)).unidentified_leaves()
                try:
                    node = next(leftover)
                    if not GuessMovieTitleFromPosition.excluded_word(node):
                        found_property(node, 'title', confidence=0.7)
                        return
                except StopIteration:
                    pass

        # look for title in basename if there are some remaining unidentified
        # groups there
        if basename_leftover:
            # if basename is only one word and the containing folder has at least
            # 3 words in it, we should take the title from the folder name
            # ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
            # ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi  <-- TODO: gets caught here?
            if (basename_leftover[0].clean_value.count(' ') == 0 and
                    folder_leftover and folder_leftover[0].clean_value.count(' ') >= 2 and
                    not GuessMovieTitleFromPosition.excluded_word(folder_leftover[0])):

                found_property(folder_leftover[0], 'title', confidence=0.7)
                return

            # if there are only many unidentified groups, take the first of which is
            # not inside brackets or parentheses.
            # ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
            if basename_leftover[0].is_explicit():
                for basename_leftover_elt in basename_leftover:
                    if not basename_leftover_elt.is_explicit() and not GuessMovieTitleFromPosition.excluded_word(basename_leftover_elt):
                        found_property(basename_leftover_elt, 'title', confidence=0.8)
                        return

            # if all else fails, take the first remaining unidentified group in the
            # basename as title
            if not GuessMovieTitleFromPosition.excluded_word(basename_leftover[0]):
                found_property(basename_leftover[0], 'title', confidence=0.6)
                return

        # if there are no leftover groups in the basename, look in the folder name
        if folder_leftover and not GuessMovieTitleFromPosition.excluded_word(folder_leftover[0]):
            found_property(folder_leftover[0], 'title', confidence=0.5)
            return

        # if nothing worked, look if we have a very small group at the beginning
        # of the basename
        basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True)
        try:
            node = next(basename_leftover)
            if not GuessMovieTitleFromPosition.excluded_word(node):
                found_property(node, 'title', confidence=0.4)
                return
        except StopIteration:
            pass
Пример #10
0
    def match_from_epnum_position(self, mtree, node):
        epnum_idx = node.node_idx

        # a few helper functions to be able to filter using high-level semantics
        def before_epnum_in_same_pathgroup():
            return [leaf for leaf in mtree.unidentified_leaves()
                     if (leaf.node_idx[0] == epnum_idx[0] and
                         leaf.node_idx[1:] < epnum_idx[1:])]

        def after_epnum_in_same_pathgroup():
            return [leaf for leaf in mtree.unidentified_leaves()
                     if (leaf.node_idx[0] == epnum_idx[0] and
                         leaf.node_idx[1:] > epnum_idx[1:])]

        def after_epnum_in_same_explicitgroup():
            return [leaf for leaf in mtree.unidentified_leaves()
                     if (leaf.node_idx[:2] == epnum_idx[:2] and
                         leaf.node_idx[2:] > epnum_idx[2:])]

        # epnumber is the first group and there are only 2 after it in same
        # path group
        # -> series title - episode title
        title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup())

        if ('title' not in mtree.info and  # no title
            before_epnum_in_same_pathgroup() == [] and  # no groups before
            len(title_candidates) == 2):  # only 2 groups after

            found_property(title_candidates[0], 'series', confidence=0.4)
            found_property(title_candidates[1], 'title', confidence=0.4)
            return

        # if we have at least 1 valid group before the episodeNumber, then it's
        # probably the series name
        series_candidates = before_epnum_in_same_pathgroup()
        if len(series_candidates) >= 1:
            found_property(series_candidates[0], 'series', confidence=0.7)

        # only 1 group after (in the same path group) and it's probably the
        # episode title
        title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup())

        if len(title_candidates) == 1:
            found_property(title_candidates[0], 'title', confidence=0.5)
            return
        else:
            # try in the same explicit group, with lower confidence
            title_candidates = self._filter_candidates(after_epnum_in_same_explicitgroup())
            if len(title_candidates) == 1:
                found_property(title_candidates[0], 'title', confidence=0.4)
                return
            elif len(title_candidates) > 1:
                found_property(title_candidates[0], 'title', confidence=0.3)
                return

        # get the one with the longest value
        title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup())
        if title_candidates:
            maxidx = -1
            maxv = -1
            for i, c in enumerate(title_candidates):
                if len(c.clean_value) > maxv:
                    maxidx = i
                    maxv = len(c.clean_value)
            found_property(title_candidates[maxidx], 'title', confidence=0.3)
Пример #11
0
    def process(self, mtree, options=None):
        """
        try to identify the remaining unknown groups by looking at their
        position relative to other known elements
        """
        eps = [
            node for node in mtree.leaves() if 'episodeNumber' in node.guess
        ]

        if not eps:
            eps = [node for node in mtree.leaves() if 'date' in node.guess]

        eps = sorted(eps, key=lambda ep: -ep.guess.confidence())
        if eps:
            performed_path_nodes = []
            for ep_node in eps:
                # Perform only first episode node for each path node
                path_node = [
                    node for node in ep_node.ancestors
                    if node.category == 'path'
                ]
                if len(path_node) > 0:
                    path_node = path_node[0]
                else:
                    path_node = ep_node.root
                if path_node not in performed_path_nodes:
                    self.match_from_epnum_position(path_node, ep_node, options)
                    performed_path_nodes.append(path_node)

        else:
            # if we don't have the episode number, but at least 2 groups in the
            # basename, then it's probably series - eptitle
            basename = list(
                filter(lambda x: x.category == 'path', mtree.nodes()))[-2]

            title_candidates = GuessEpisodeInfoFromPosition._filter_candidates(
                basename.unidentified_leaves(), options)

            if len(title_candidates) >= 2 and 'series' not in mtree.info:
                found_property(title_candidates[0], 'series', confidence=0.4)
                found_property(title_candidates[1], 'title', confidence=0.4)
            elif len(title_candidates) == 1:
                # but if there's only one candidate, it's probably the series name
                found_property(
                    title_candidates[0],
                    'series' if 'series' not in mtree.info else 'title',
                    confidence=0.4)

        # if we only have 1 remaining valid group in the folder containing the
        # file, then it's likely that it is the series name
        path_nodes = list(filter(lambda x: x.category == 'path',
                                 mtree.nodes()))
        try:
            series_candidates = list(path_nodes[-3].unidentified_leaves())
        except IndexError:
            series_candidates = []

        if len(series_candidates
               ) == 1 and not GuessEpisodeInfoFromPosition.excluded_word(
                   series_candidates[0]):
            found_property(series_candidates[0], 'series', confidence=0.3)

        # if there's a path group that only contains the season info, then the
        # previous one is most likely the series title (ie: ../series/season X/..)
        eps = [
            node for node in mtree.nodes()
            if 'season' in node.guess and 'episodeNumber' not in node.guess
        ]

        if eps:
            previous = [
                node for node in mtree.unidentified_leaves()
                if node.node_idx[0] == eps[0].node_idx[0] - 1
            ]
            if len(previous
                   ) == 1 and not GuessEpisodeInfoFromPosition.excluded_word(
                       previous[0]):
                found_property(previous[0], 'series', confidence=0.5)

        # If we have found title without any serie name, replace it by the serie name.
        if 'series' not in mtree.info and 'title' in mtree.info:
            title_leaf = mtree.first_leaf_containing('title')
            metadata = title_leaf.guess.metadata('title')
            value = title_leaf.guess['title']
            del title_leaf.guess['title']
            title_leaf.guess.set('series', value, metadata=metadata)
Пример #12
0
    def process(self, mtree, options=None):
        """
        try to identify the remaining unknown groups by looking at their
        position relative to other known elements
        """
        basename = mtree.node_at((-2,))
        all_valid = lambda leaf: len(leaf.clean_value) > 0
        basename_leftover = basename.unidentified_leaves(valid=all_valid)

        try:
            folder = mtree.node_at((-3,))
            folder_leftover = folder.unidentified_leaves()
        except ValueError:
            folder = None
            folder_leftover = []

        self.log.debug('folder: %s' % u(folder_leftover))
        self.log.debug('basename: %s' % u(basename_leftover))

        # specific cases:
        # if we find the same group both in the folder name and the filename,
        # it's a good candidate for title
        if (folder_leftover and basename_leftover and
            folder_leftover[0].clean_value == basename_leftover[0].clean_value):

            found_property(folder_leftover[0], 'title', confidence=0.8)
            return

        # specific cases:
        # if the basename contains a number first followed by an unidentified
        # group, and the folder only contains 1 unidentified one, then we have
        # a series
        # ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
        try:
            series = folder_leftover[0]
            filmNumber = basename_leftover[0]
            title = basename_leftover[1]

            basename_leaves = basename.leaves()

            num = int(filmNumber.clean_value)

            self.log.debug('series: %s' % series.clean_value)
            self.log.debug('title: %s' % title.clean_value)
            if (series.clean_value != title.clean_value and
                series.clean_value != filmNumber.clean_value and
                basename_leaves.index(filmNumber) == 0 and
                basename_leaves.index(title) == 1):

                found_property(title, 'title', confidence=0.6)
                found_property(series, 'filmSeries', confidence=0.6)
                found_property(filmNumber, 'filmNumber', num, confidence=0.6)
            return
        except Exception:
            pass

        # specific cases:
        #  - movies/tttttt (yyyy)/tttttt.ccc
        try:
            if mtree.node_at((-4, 0)).value.lower() == 'movies':
                folder = mtree.node_at((-3,))

                # Note:too generic, might solve all the unittests as they all
                # contain 'movies' in their path
                #
                # if containing_folder.is_leaf() and not containing_folder.guess:
                #    containing_folder.guess =
                #        Guess({ 'title': clean_string(containing_folder.value) },
                #              confidence=0.7)

                year_group = folder.first_leaf_containing('year')
                groups_before = folder.previous_unidentified_leaves(year_group)

                found_property(groups_before[0], 'title', confidence=0.8)
                return

        except Exception:
            pass

        # if we have either format or videoCodec in the folder containing the file
        # or one of its parents, then we should probably look for the title in
        # there rather than in the basename
        try:
            props = mtree.previous_leaves_containing(mtree.children[-2],
                                                     ['videoCodec', 'format',
                                                       'language'])
        except IndexError:
            props = []

        if props:
            group_idx = props[0].node_idx[0]
            if all(g.node_idx[0] == group_idx for g in props):
                # if they're all in the same group, take leftover info from there
                leftover = mtree.node_at((group_idx,)).unidentified_leaves()

                if leftover:
                    found_property(leftover[0], 'title', confidence=0.7)
                    return

        # look for title in basename if there are some remaining unidentified
        # groups there
        if basename_leftover:
            # if basename is only one word and the containing folder has at least
            # 3 words in it, we should take the title from the folder name
            # ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
            # ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi  <-- TODO: gets caught here?
            if (basename_leftover[0].clean_value.count(' ') == 0 and
                folder_leftover and
                folder_leftover[0].clean_value.count(' ') >= 2):

                found_property(folder_leftover[0], 'title', confidence=0.7)
                return

            # if there are only many unidentified groups, take the first of which is
            # not inside brackets or parentheses.
            # ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
            if basename_leftover[0].is_explicit():
                for basename_leftover_elt in basename_leftover:
                    if not basename_leftover_elt.is_explicit():
                        found_property(basename_leftover_elt, 'title', confidence=0.8)
                        return

            # if all else fails, take the first remaining unidentified group in the
            # basename as title
            found_property(basename_leftover[0], 'title', confidence=0.6)
            return

        # if there are no leftover groups in the basename, look in the folder name
        if folder_leftover:
            found_property(folder_leftover[0], 'title', confidence=0.5)
            return

        # if nothing worked, look if we have a very small group at the beginning
        # of the basename
        basename = mtree.node_at((-2,))
        basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True)
        if basename_leftover:
            found_property(basename_leftover[0], 'title', confidence=0.4)
            return
Пример #13
0
    def process(self, mtree, options=None):
        """
        try to identify the remaining unknown groups by looking at their
        position relative to other known elements
        """
        if 'title' in mtree.info:
            return

        path_nodes = list(filter(lambda x: x.category == 'path',
                                 mtree.nodes()))

        basename = path_nodes[-2]
        all_valid = lambda leaf: len(leaf.clean_value) > 0
        basename_leftover = list(basename.unidentified_leaves(valid=all_valid))

        try:
            folder = path_nodes[-3]
            folder_leftover = list(folder.unidentified_leaves())
        except IndexError:
            folder = None
            folder_leftover = []

        self.log.debug('folder: %s' % u(folder_leftover))
        self.log.debug('basename: %s' % u(basename_leftover))

        # specific cases:
        # if we find the same group both in the folder name and the filename,
        # it's a good candidate for title
        if (folder_leftover and basename_leftover
                and folder_leftover[0].clean_value
                == basename_leftover[0].clean_value
                and not GuessMovieTitleFromPosition.excluded_word(
                    folder_leftover[0])):
            found_property(folder_leftover[0], 'title', confidence=0.8)
            return

        # specific cases:
        # if the basename contains a number first followed by an unidentified
        # group, and the folder only contains 1 unidentified one, then we have
        # a series
        # ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
        if len(folder_leftover) > 0 and len(basename_leftover) > 1:
            series = folder_leftover[0]
            film_number = basename_leftover[0]
            title = basename_leftover[1]

            basename_leaves = list(basename.leaves())

            num = None
            try:
                num = int(film_number.clean_value)
            except ValueError:
                pass

            if num:
                self.log.debug('series: %s' % series.clean_value)
                self.log.debug('title: %s' % title.clean_value)
                if (series.clean_value != title.clean_value
                        and series.clean_value != film_number.clean_value
                        and basename_leaves.index(film_number) == 0
                        and basename_leaves.index(title) == 1
                        and not GuessMovieTitleFromPosition.excluded_word(
                            title, series)):

                    found_property(title, 'title', confidence=0.6)
                    found_property(series, 'filmSeries', confidence=0.6)
                    found_property(film_number,
                                   'filmNumber',
                                   num,
                                   confidence=0.6)
                return

        if folder:
            year_group = folder.first_leaf_containing('year')
            if year_group:
                groups_before = folder.previous_unidentified_leaves(year_group)
                if groups_before:
                    try:
                        node = next(groups_before)
                        if not GuessMovieTitleFromPosition.excluded_word(node):
                            found_property(node, 'title', confidence=0.8)
                            return
                    except StopIteration:
                        pass

        # if we have either format or videoCodec in the folder containing the
        # file or one of its parents, then we should probably look for the title
        # in there rather than in the basename
        try:
            props = list(
                mtree.previous_leaves_containing(
                    mtree.children[-2], ['videoCodec', 'format', 'language']))
        except IndexError:
            props = []

        if props:
            group_idx = props[0].node_idx[0]
            if all(g.node_idx[0] == group_idx for g in props):
                # if they're all in the same group, take leftover info from there
                leftover = mtree.node_at((group_idx, )).unidentified_leaves()
                try:
                    node = next(leftover)
                    if not GuessMovieTitleFromPosition.excluded_word(node):
                        found_property(node, 'title', confidence=0.7)
                        return
                except StopIteration:
                    pass

        # look for title in basename if there are some remaining unidentified
        # groups there
        if basename_leftover:
            # if basename is only one word and the containing folder has at least
            # 3 words in it, we should take the title from the folder name
            # ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
            # ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi  <-- TODO: gets caught here?
            if (basename_leftover[0].clean_value.count(' ') == 0
                    and folder_leftover
                    and folder_leftover[0].clean_value.count(' ') >= 2
                    and not GuessMovieTitleFromPosition.excluded_word(
                        folder_leftover[0])):

                found_property(folder_leftover[0], 'title', confidence=0.7)
                return

            # if there are only many unidentified groups, take the first of which is
            # not inside brackets or parentheses.
            # ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
            if basename_leftover[0].is_explicit():
                for basename_leftover_elt in basename_leftover:
                    if not basename_leftover_elt.is_explicit(
                    ) and not GuessMovieTitleFromPosition.excluded_word(
                            basename_leftover_elt):
                        found_property(basename_leftover_elt,
                                       'title',
                                       confidence=0.8)
                        return

            # if all else fails, take the first remaining unidentified group in the
            # basename as title
            if not GuessMovieTitleFromPosition.excluded_word(
                    basename_leftover[0]):
                found_property(basename_leftover[0], 'title', confidence=0.6)
                return

        # if there are no leftover groups in the basename, look in the folder name
        if folder_leftover and not GuessMovieTitleFromPosition.excluded_word(
                folder_leftover[0]):
            found_property(folder_leftover[0], 'title', confidence=0.5)
            return

        # if nothing worked, look if we have a very small group at the beginning
        # of the basename
        basename_leftover = basename.unidentified_leaves(
            valid=lambda leaf: True)
        try:
            node = next(basename_leftover)
            if not GuessMovieTitleFromPosition.excluded_word(node):
                found_property(node, 'title', confidence=0.4)
                return
        except StopIteration:
            pass
Пример #14
0
    def process(self, mtree, options=None):
        """
        try to identify the remaining unknown groups by looking at their
        position relative to other known elements
        """
        basename = mtree.node_at((-2, ))
        all_valid = lambda leaf: len(leaf.clean_value) > 0
        basename_leftover = basename.unidentified_leaves(valid=all_valid)

        try:
            folder = mtree.node_at((-3, ))
            folder_leftover = folder.unidentified_leaves()
        except ValueError:
            folder = None
            folder_leftover = []

        self.log.debug('folder: %s' % u(folder_leftover))
        self.log.debug('basename: %s' % u(basename_leftover))

        # specific cases:
        # if we find the same group both in the folder name and the filename,
        # it's a good candidate for title
        if (folder_leftover and basename_leftover
                and folder_leftover[0].clean_value
                == basename_leftover[0].clean_value):

            found_property(folder_leftover[0], 'title', confidence=0.8)
            return

        # specific cases:
        # if the basename contains a number first followed by an unidentified
        # group, and the folder only contains 1 unidentified one, then we have
        # a series
        # ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
        try:
            series = folder_leftover[0]
            filmNumber = basename_leftover[0]
            title = basename_leftover[1]

            basename_leaves = basename.leaves()

            num = int(filmNumber.clean_value)

            self.log.debug('series: %s' % series.clean_value)
            self.log.debug('title: %s' % title.clean_value)
            if (series.clean_value != title.clean_value
                    and series.clean_value != filmNumber.clean_value
                    and basename_leaves.index(filmNumber) == 0
                    and basename_leaves.index(title) == 1):

                found_property(title, 'title', confidence=0.6)
                found_property(series, 'filmSeries', confidence=0.6)
                found_property(filmNumber, 'filmNumber', num, confidence=0.6)
            return
        except Exception:
            pass

        # specific cases:
        #  - movies/tttttt (yyyy)/tttttt.ccc
        try:
            if mtree.node_at((-4, 0)).value.lower() == 'movies':
                folder = mtree.node_at((-3, ))

                # Note:too generic, might solve all the unittests as they all
                # contain 'movies' in their path
                #
                # if containing_folder.is_leaf() and not containing_folder.guess:
                #    containing_folder.guess =
                #        Guess({ 'title': clean_string(containing_folder.value) },
                #              confidence=0.7)

                year_group = folder.first_leaf_containing('year')
                groups_before = folder.previous_unidentified_leaves(year_group)

                found_property(groups_before[0], 'title', confidence=0.8)
                return

        except Exception:
            pass

        # if we have either format or videoCodec in the folder containing the file
        # or one of its parents, then we should probably look for the title in
        # there rather than in the basename
        try:
            props = mtree.previous_leaves_containing(
                mtree.children[-2], ['videoCodec', 'format', 'language'])
        except IndexError:
            props = []

        if props:
            group_idx = props[0].node_idx[0]
            if all(g.node_idx[0] == group_idx for g in props):
                # if they're all in the same group, take leftover info from there
                leftover = mtree.node_at((group_idx, )).unidentified_leaves()

                if leftover:
                    found_property(leftover[0], 'title', confidence=0.7)
                    return

        # look for title in basename if there are some remaining unidentified
        # groups there
        if basename_leftover:
            # if basename is only one word and the containing folder has at least
            # 3 words in it, we should take the title from the folder name
            # ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
            # ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi  <-- TODO: gets caught here?
            if (basename_leftover[0].clean_value.count(' ') == 0
                    and folder_leftover
                    and folder_leftover[0].clean_value.count(' ') >= 2):

                found_property(folder_leftover[0], 'title', confidence=0.7)
                return

            # if there are only many unidentified groups, take the first of which is
            # not inside brackets or parentheses.
            # ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
            if basename_leftover[0].is_explicit():
                for basename_leftover_elt in basename_leftover:
                    if not basename_leftover_elt.is_explicit():
                        found_property(basename_leftover_elt,
                                       'title',
                                       confidence=0.8)
                        return

            # if all else fails, take the first remaining unidentified group in the
            # basename as title
            found_property(basename_leftover[0], 'title', confidence=0.6)
            return

        # if there are no leftover groups in the basename, look in the folder name
        if folder_leftover:
            found_property(folder_leftover[0], 'title', confidence=0.5)
            return

        # if nothing worked, look if we have a very small group at the beginning
        # of the basename
        basename = mtree.node_at((-2, ))
        basename_leftover = basename.unidentified_leaves(
            valid=lambda leaf: True)
        if basename_leftover:
            found_property(basename_leftover[0], 'title', confidence=0.4)
            return