Пример #1
0
    def matched(self):
        """Return a single guess that contains all the info found in the
        nodes of this tree, trying to merge properties as good as possible.
        """
        if not getattr(self, '_matched_result', None):
            # we need to make a copy here, as the merge functions work in place and
            # calling them on the match tree would modify it
            parts = [copy.copy(node.guess) for node in self.nodes() if node.guess]

            result = smart_merge(parts)

            log.debug('Final result: ' + result.nice_string())
            self._matched_result = result

        return self._matched_result
Пример #2
0
    def matched(self):
        """Return a single guess that contains all the info found in the
        nodes of this tree, trying to merge properties as good as possible.
        """
        if not getattr(self, '_matched_result', None):
            # we need to make a copy here, as the merge functions work in place and
            # calling them on the match tree would modify it
            parts = [copy.copy(node.guess) for node in self.nodes() if node.guess]

            result = smart_merge(parts)

            log.debug('Final result: ' + result.nice_string())
            self._matched_result = result

        for leaf in self.unidentified_leaves():
            if 'unidentified' not in self._matched_result:
                self._matched_result['unidentified'] = []
            self._matched_result['unidentified'].append(leaf.clean_value)

        return self._matched_result
Пример #3
0
def guess_file_info(filename, info=None, options=None, **kwargs):
    """info can contain the names of the various plugins, such as 'filename' to
    detect filename info, or 'hash_md5' to get the md5 hash of the file.

    >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
    >>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1'])
    >>> g['hash_md5'], g['hash_sha1']
    ('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
    """
    info = info or 'filename'
    options = options or {}

    if isinstance(options, base_text_type):
        args = shlex.split(options)
        options = vars(get_opts().parse_args(args))
    if default_options:
        if isinstance(default_options, base_text_type):
            default_args = shlex.split(default_options)
            merged_options = vars(get_opts().parse_args(default_args))
        else:
            merged_options = deepcopy(default_options)
        merged_options.update(options)
        options = merged_options

    result = []
    hashers = []

    # Force unicode as soon as possible
    filename = u(filename)

    if isinstance(info, base_text_type):
        info = [info]

    for infotype in info:
        if infotype == 'filename':
            result.append(_guess_filename(filename, options, **kwargs))

        elif infotype == 'hash_mpc':
            from guessit.hash_mpc import hash_file
            try:
                result.append(
                    Guess({infotype: hash_file(filename)}, confidence=1.0))
            except Exception as e:
                log.warning('Could not compute MPC-style hash because: %s' % e)

        elif infotype == 'hash_ed2k':
            from guessit.hash_ed2k import hash_file
            try:
                result.append(
                    Guess({infotype: hash_file(filename)}, confidence=1.0))
            except Exception as e:
                log.warning('Could not compute ed2k hash because: %s' % e)

        elif infotype.startswith('hash_'):
            import hashlib
            hashname = infotype[5:]
            try:
                hasher = getattr(hashlib, hashname)()
                hashers.append((infotype, hasher))
            except AttributeError:
                log.warning(
                    'Could not compute %s hash because it is not available from python\'s hashlib module'
                    % hashname)

        elif infotype == 'video':
            g = guess_video_metadata(filename)
            if g:
                result.append(g)

        else:
            log.warning('Invalid infotype: %s' % infotype)

    # do all the hashes now, but on a single pass
    if hashers:
        try:
            blocksize = 8192
            hasherobjs = dict(hashers).values()

            with open(filename, 'rb') as f:
                chunk = f.read(blocksize)
                while chunk:
                    for hasher in hasherobjs:
                        hasher.update(chunk)
                    chunk = f.read(blocksize)

            for infotype, hasher in hashers:
                result.append(
                    Guess({infotype: hasher.hexdigest()}, confidence=1.0))
        except Exception as e:
            log.warning('Could not compute hash because: %s' % e)

    result = smart_merge(result)

    return result
Пример #4
0
def guess_file_info(filename, info=None, options=None, **kwargs):
    """info can contain the names of the various plugins, such as 'filename' to
    detect filename info, or 'hash_md5' to get the md5 hash of the file.

    >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
    >>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1'])
    >>> g['hash_md5'], g['hash_sha1']
    ('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
    """
    info = info or 'filename'
    options = options or {}

    if isinstance(options, base_text_type):
        args = shlex.split(options)
        options = vars(get_opts().parse_args(args))
    if default_options:
        if isinstance(default_options, base_text_type):
            default_args = shlex.split(default_options)
            merged_options = vars(get_opts().parse_args(default_args))
        else:
            merged_options = deepcopy(default_options)
        merged_options.update(options)
        options = merged_options

    result = []
    hashers = []

    # Force unicode as soon as possible
    filename = u(filename)

    if isinstance(info, base_text_type):
        info = [info]

    for infotype in info:
        if infotype == 'filename':
            result.append(_guess_filename(filename, options, **kwargs))

        elif infotype == 'hash_mpc':
            from guessit.hash_mpc import hash_file
            try:
                result.append(Guess({infotype: hash_file(filename)},
                                    confidence=1.0))
            except Exception as e:
                log.warning('Could not compute MPC-style hash because: %s' % e)

        elif infotype == 'hash_ed2k':
            from guessit.hash_ed2k import hash_file
            try:
                result.append(Guess({infotype: hash_file(filename)},
                                    confidence=1.0))
            except Exception as e:
                log.warning('Could not compute ed2k hash because: %s' % e)

        elif infotype.startswith('hash_'):
            import hashlib
            hashname = infotype[5:]
            try:
                hasher = getattr(hashlib, hashname)()
                hashers.append((infotype, hasher))
            except AttributeError:
                log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname)

        elif infotype == 'video':
            g = guess_video_metadata(filename)
            if g:
                result.append(g)

        else:
            log.warning('Invalid infotype: %s' % infotype)

    # do all the hashes now, but on a single pass
    if hashers:
        try:
            blocksize = 8192
            hasherobjs = dict(hashers).values()

            with open(filename, 'rb') as f:
                chunk = f.read(blocksize)
                while chunk:
                    for hasher in hasherobjs:
                        hasher.update(chunk)
                    chunk = f.read(blocksize)

            for infotype, hasher in hashers:
                result.append(Guess({infotype: hasher.hexdigest()},
                                    confidence=1.0))
        except Exception as e:
            log.warning('Could not compute hash because: %s' % e)

    result = smart_merge(result)

    return result