Exemplo n.º 1
0
 def test_file_extension_on_path_and_location_10(self):
     test_dir = self.get_test_loc('fileutils/basename')
     test_file = 'tst'
     expected_name = ''
     result = fileutils.file_extension(test_file)
     assert expected_name == result
     result = fileutils.file_extension((os.path.join(test_dir, test_file)))
     assert expected_name == result
Exemplo n.º 2
0
def get_file_infos(location):
    """
    Return a list of dictionaries of informations collected from the file or
    directory at location.
    """
    from commoncode import fileutils
    from commoncode import filetype
    from commoncode.hash import sha1, md5
    from typecode import contenttype

    T = contenttype.get_type(location)
    is_file = T.is_file
    is_dir = T.is_dir
    infos = OrderedDict()
    infos['type'] = filetype.get_type(location, short=False)
    infos['name'] = fileutils.file_name(location)
    infos['extension'] = is_file and fileutils.file_extension(location) or ''
    infos['date'] = is_file and filetype.get_last_modified_date(
        location) or None
    infos['size'] = T.size
    infos['sha1'] = is_file and sha1(location) or None
    infos['md5'] = is_file and md5(location) or None
    infos['files_count'] = is_dir and filetype.get_file_count(location) or None
    infos['mime_type'] = is_file and T.mimetype_file or None
    infos['file_type'] = is_file and T.filetype_file or None
    infos['programming_language'] = is_file and T.programming_language or None
    infos['is_binary'] = is_file and T.is_binary or None
    infos['is_text'] = is_file and T.is_text or None
    infos['is_archive'] = is_file and T.is_archive or None
    infos['is_media'] = is_file and T.is_media or None
    infos['is_source'] = is_file and T.is_source or None
    infos['is_script'] = is_file and T.is_script or None
    return [infos]
Exemplo n.º 3
0
def from_name(filename):
    """
    Return an (E, N, V, R, A) tuple given a file name, by splitting
    [e:]name-version-release.arch into the four possible subcomponents.
    Default epoch, version, release and arch to None if not specified.
    Accepts RPM names with and without extensions
    """
    parse_nevra = re.compile("^"
                             "(.*)"
                             "-"
                             "([^-]*)"
                             "-"
                             "([^-]*)"
                             "\\."
                             "([^.]*)"
                             "$").match
    file_ext = fileutils.file_extension(filename) or None
    if file_ext in ['.rpm', '.srpm']:
        filename = filename[:-len(file_ext)]
    m = parse_nevra(filename)
    if not m:
        return None
    n, v, r, a = m.groups()
    if file_ext == '.srpm':
        a = 'src'
    if ':' not in v:
        return None, n, v, r, a
    e, v = v.split(':', 1)
    e = int(e)
    return (e, n, v, r, a)
Exemplo n.º 4
0
def is_special_legal_file(location):
    """
    Return an indication that a file may be a "special" legal-like file.
    """
    file_base_name = fileutils.file_base_name(location)
    file_base_name_lower = file_base_name.lower()
    file_extension = fileutils.file_extension(location)
    file_extension_lower = file_extension.lower()

    name_contains_special = (special_name in file_base_name
                             or special_name in file_extension
                             for special_name in special_names)

    name_lower_is_special = (special_name_lower
                             in (file_base_name_lower, file_extension_lower)
                             for special_name_lower in special_names_lower)

    name_lower_contains_special = (
        special_name_lower in file_base_name_lower
        or special_name_lower in file_extension_lower
        for special_name_lower in special_names_lower)

    if any(name_contains_special) or any(name_lower_is_special):
        return 'yes'

    elif any(name_lower_contains_special):
        return 'maybe'
    else:
        # return False for now?
        pass
Exemplo n.º 5
0
def get_pygments_lexer(location):
    """
    Given an input file location, return a Pygments lexer appropriate for
    lexing this file content.
    """
    try:
        T = _registry[location]
        if T.is_binary:
            return
    except KeyError:
        if is_binary(location):
            return

    # We first try to get a lexer using
    #  - the filename
    #  - then the lowercased filename
    #  - and finally the begining of the file content.
    # We try with lowercase as detection is skewed otherwise (e.g. .java vs .JAVA)

    try:
        return get_lexer_for_filename(location)
    except LexerClassNotFound:
        try:
            return get_lexer_for_filename(location.lower())
        except LexerClassNotFound:
            # only try content-based detection if we do not have an extension
            ext = fileutils.file_extension(location)
            if not ext:
                try:
                    # if Pygments does not guess we should not carry forward
                    content = get_text_file_start(location)
                    return guess_lexer(content)
                except LexerClassNotFound:
                    return
Exemplo n.º 6
0
def get_file_infos(location):
    """
    Return a list of dictionaries of informations collected from the file or
    directory at location.
    """
    from commoncode import fileutils
    from commoncode import filetype
    from commoncode.hash import sha1, md5
    from typecode import contenttype

    T = contenttype.get_type(location)
    is_file = T.is_file
    is_dir = T.is_dir
    infos = OrderedDict()
    infos['type'] = filetype.get_type(location, short=False)
    infos['name'] = fileutils.file_name(location)
    infos['extension'] = is_file and fileutils.file_extension(location) or ''
    infos['date'] = is_file and filetype.get_last_modified_date(location) or None
    infos['size'] = T.size
    infos['sha1'] = is_file and sha1(location) or None
    infos['md5'] = is_file and md5(location) or None
    infos['files_count'] = is_dir and filetype.get_file_count(location) or None
    infos['mime_type'] = is_file and T.mimetype_file or None
    infos['file_type'] = is_file and T.filetype_file or None
    infos['programming_language'] = is_file and T.programming_language or None
    infos['is_binary'] = is_file and T.is_binary or None
    infos['is_text'] = is_file and T.is_text or None
    infos['is_archive'] = is_file and T.is_archive or None
    infos['is_media'] = is_file and T.is_media or None
    infos['is_source'] = is_file and T.is_source or None
    infos['is_script'] = is_file and T.is_script or None
    return [infos]
Exemplo n.º 7
0
    def is_c_source(self):
        C_EXTENSIONS = set([
            '.c', '.cc', '.cp', '.cpp', '.cxx', '.c++', '.h', '.hh', '.s',
            '.asm', '.hpp', '.hxx', '.h++', '.i', '.ii', '.m'
        ])

        ext = fileutils.file_extension(self.location)
        return self.is_text is True and ext.lower() in C_EXTENSIONS
Exemplo n.º 8
0
 def test_file_extension_on_path_and_location(self):
     test_dir = self.get_test_loc("fileutils/basename", copy=True)
     tests = [
         ("a/.a/file", ""),
         ("a/.a/", ""),
         ("a/b/.a.b", ".b"),
         ("a/b/a.tag.gz", ".gz"),
         ("a/b/", ""),
         ("a/f.a", ".a"),
         ("a/", ""),
         ("f.a/a.c", ".c"),
         ("f.a/", ""),
         ("tst", ""),
     ]
     for test_file, name in tests:
         result = fileutils.file_extension(test_file)
         assert name == result
         # also test on location
         result = fileutils.file_extension((os.path.join(test_dir, test_file)))
         assert name == result
    def is_c_source(self):
        C_EXTENSIONS = set(
            ['.c', '.cc', '.cp', '.cpp', '.cxx', '.c++', '.h', '.hh',
            '.s', '.asm', '.hpp', '.hxx', '.h++', '.i', '.ii', '.m'])
        if on_linux and py2:
            C_EXTENSIONS = set(as_bytes(C_EXTENSIONS))

        ext = fileutils.file_extension(self.location)
        if self.is_text is True and ext.lower() in C_EXTENSIONS:
            return True
        else:
            return False
Exemplo n.º 10
0
def get_resource_info(location):
    """
    Return a mapping suitable for the creation of a new CodebaseResource.
    """
    file_info = {}

    location_path = Path(location)
    is_symlink = location_path.is_symlink()
    is_file = location_path.is_file()

    if is_symlink:
        resource_type = CodebaseResource.Type.SYMLINK
        file_info["status"] = "symlink"
    elif is_file:
        resource_type = CodebaseResource.Type.FILE
    else:
        resource_type = CodebaseResource.Type.DIRECTORY

    file_info.update(
        {
            "type": resource_type,
            "name": fileutils.file_base_name(location),
            "extension": fileutils.file_extension(location),
        }
    )

    if is_symlink:
        return file_info

    # Missing fields on CodebaseResource model returned by `get_file_info`.
    unsupported_fields = [
        "is_binary",
        "is_text",
        "is_archive",
        "is_media",
        "is_source",
        "is_script",
        "date",
    ]

    other_info = scancode_api.get_file_info(location)

    # Skip unsupported_fields
    # Skip empty values to avoid null vs. '' conflicts
    other_info = {
        field_name: value
        for field_name, value in other_info.items()
        if field_name not in unsupported_fields and value
    }

    file_info.update(other_info)

    return file_info
Exemplo n.º 11
0
 def test_file_extension_on_path_and_location(self):
     test_dir = self.get_test_loc('fileutils/basename', copy=True)
     tests = [
         ('a/.a/file', ''),
         ('a/.a/', ''),
         ('a/b/.a.b', '.b'),
         ('a/b/a.tag.gz', '.gz'),
         ('a/b/', ''),
         ('a/f.a', '.a'),
         ('a/', ''),
         ('f.a/a.c', '.c'),
         ('f.a/', ''),
         ('tst', ''),
     ]
     for test_file, name in tests:
         result = fileutils.file_extension(test_file)
         assert name == result
         # also test on location
         result = fileutils.file_extension(
             (os.path.join(test_dir, test_file)))
         assert name == result
Exemplo n.º 12
0
def is_special_legal_file(location):
    file_base_name = fileutils.file_base_name(location).lower()
    file_extension = fileutils.file_extension(location).lower()

    if (any(special_name == file_base_name or special_name == file_extension
            for special_name in special_names_lower)
            or any(special_name in file_base_name
                   or special_name in file_extension
                   for special_name in special_names)):
        return 'yes'

    elif any(special_name in file_base_name or special_name in file_extension
             for special_name in special_names_lower):
        return 'maybe'
    else:
        # return False for now?
        pass
    def check_get_extractors(self, test_file, expected, kinds=()):
        from extractcode import archive

        test_loc = self.get_test_loc(test_file)
        if kinds:
            extractors = archive.get_extractors(test_loc, kinds)
        else:
            extractors = archive.get_extractors(test_loc)

        # import typecode
        # ft = 'TODO' or typecode.contenttype.get_type(test_loc).filetype_file
        # mt = 'TODO' or typecode.contenttype.get_type(test_loc).mimetype_file
        fe = fileutils.file_extension(test_loc).lower()
        em = ', '.join(e.__module__ + '.' + e.__name__ for e in extractors)

        msg = ('%(expected)r == %(extractors)r for %(test_file)s\n'
               'with fe:%(fe)r, em:%(em)s' % locals())
        assert expected == extractors, msg
Exemplo n.º 14
0
def new_name(location, is_dir=False):
    """
    Return a new non-existing location usable to write a file or create
    directory without overwriting existing files or directories in the same
    parent directory, ignoring the case of the name.
    The case of the name is ignored to ensure that similar results are returned
    across case sensitive (*nix) and case insensitive file systems.

    To find a new unique name:
     * pad a directory name with _X where X is an incremented number.
     * pad a file base name with _X where X is an incremented number and keep
       the extension unchanged.
    """
    assert location
    
    location = location.rstrip('\\/')
    name = fileutils.file_name(location).strip()
    if (not name or name == '.' 
        # windows bare drive path as in c: or z:
        or (name and len(name)==2 and name.endswith(':'))):
        name = 'file'

    parent = fileutils.parent_directory(location)
    # all existing files or directory as lower case
    siblings_lower = set(s.lower() for s in os.listdir(parent))

    if name.lower() not in siblings_lower:
        return posixpath.join(parent, name)

    ext = fileutils.file_extension(name)
    base_name = fileutils.file_base_name(name)
    if is_dir:
        # directories have no extension
        ext = ''
        base_name = name

    counter = 1
    while True:
        new_name = base_name + '_' + str(counter) + ext
        if new_name.lower() not in siblings_lower:
            break
        counter += 1
    return os.path.join(parent, new_name)
Exemplo n.º 15
0
def new_name(location, is_dir=False):
    """
    Return a new non-existing location usable to write a file or create
    directory without overwriting existing files or directories in the same
    parent directory, ignoring the case of the name.
    The case of the name is ignored to ensure that similar results are returned
    across case sensitive (*nix) and case insensitive file systems.

    To find a new unique name:
     * pad a directory name with _X where X is an incremented number.
     * pad a file base name with _X where X is an incremented number and keep
       the extension unchanged.
    """
    assert location

    location = location.rstrip('\\/')
    name = fileutils.file_name(location).strip()
    if (not name or name == '.'
            # windows bare drive path as in c: or z:
            or (name and len(name) == 2 and name.endswith(':'))):
        name = 'file'

    parent = fileutils.parent_directory(location)
    # all existing files or directory as lower case
    siblings_lower = set(s.lower() for s in os.listdir(parent))

    if name.lower() not in siblings_lower:
        return posixpath.join(parent, name)

    ext = fileutils.file_extension(name)
    base_name = fileutils.file_base_name(name)
    if is_dir:
        # directories have no extension
        ext = ''
        base_name = name

    counter = 1
    while True:
        new_name = base_name + '_' + str(counter) + ext
        if new_name.lower() not in siblings_lower:
            break
        counter += 1
    return os.path.join(parent, new_name)
Exemplo n.º 16
0
def is_special_legal_file(location):
        file_base_name = fileutils.file_base_name(location).lower()
        file_extension = fileutils.file_extension(location).lower()

        if (any(special_name == file_base_name
                or special_name == file_extension
                for special_name in special_names_lower)
         or any(special_name in file_base_name
                or special_name in file_extension
                for special_name in special_names)):
            return 'yes'

        elif any(special_name in file_base_name
                 or special_name in file_extension
                for special_name in special_names_lower):
            return 'maybe'
        else:
            # return False for now?
            pass
Exemplo n.º 17
0
def get_file_infos(location, as_list=True):
    """
    Return a list of dictionaries of informations collected from the file or
    directory at location.
    """
    from commoncode import fileutils
    from commoncode import filetype
    from commoncode.hash import multi_checksums
    from scancode import utils
    from typecode import contenttype

    infos = OrderedDict()
    is_file = filetype.is_file(location)
    is_dir = filetype.is_dir(location)

    T = contenttype.get_type(location)

    infos['type'] = filetype.get_type(location, short=False)
    infos['name'] = fileutils.file_name(location)
    infos['extension'] = is_file and fileutils.file_extension(location) or ''
    infos['date'] = is_file and filetype.get_last_modified_date(
        location) or None
    infos['size'] = T.size
    infos.update(multi_checksums(location, (
        'sha1',
        'md5',
    )))
    infos['files_count'] = is_dir and filetype.get_file_count(location) or None
    infos['mime_type'] = is_file and T.mimetype_file or None
    infos['file_type'] = is_file and T.filetype_file or None
    infos['programming_language'] = is_file and T.programming_language or None
    infos['is_binary'] = bool(is_file and T.is_binary)
    infos['is_text'] = bool(is_file and T.is_text)
    infos['is_archive'] = bool(is_file and T.is_archive)
    infos['is_media'] = bool(is_file and T.is_media)
    infos['is_source'] = bool(is_file and T.is_source)
    infos['is_script'] = bool(is_file and T.is_script)

    if as_list:
        return [infos]
    else:
        return infos
Exemplo n.º 18
0
def is_special_legal_file(location):
    """
    Return an indication that a file may be a "special" legal-like file.
    """
    file_base_name = fileutils.file_base_name(location).lower()
    file_extension = fileutils.file_extension(location).lower()

    if (any(special_name == file_base_name or special_name == file_extension
            for special_name in special_names_lower)
            or any(special_name in file_base_name
                   or special_name in file_extension
                   for special_name in special_names)):
        return 'yes'

    elif any(special_name in file_base_name or special_name in file_extension
             for special_name in special_names_lower):
        return 'maybe'
    else:
        # return False for now?
        pass
Exemplo n.º 19
0
def from_name(filename):
    """
    Return an (E, N, V, R, A) tuple given a file name, by splitting
    [e:]name-version-release.arch into the four possible subcomponents.
    Default epoch, version, release and arch to None if not specified.
    Accepts RPM names with and without extensions
    """
    _re = re.compile("^(.*)-([^-]*)-([^-]*)\.([^.]*)$")
    file_ext = fileutils.file_extension(filename) or None
    if file_ext in ['.rpm', 'srpm']:
        filename = filename[:-len(file_ext)]
    m = _re.match(filename)
    if not m:
        return None
    n, v, r, a = m.groups()
    if file_ext == '.srpm':
        a = 'src'
    if ':' not in v:
        return None, n, v, r, a
    e, v = v.split(':', 1)
    e = int(e)
    return (e, n, v, r, a)
Exemplo n.º 20
0
def is_special_legal_file(location):
    """
    Return an indication that a file may be a "special" legal-like file.
    """
    file_base_name = fileutils.file_base_name(location).lower()
    file_extension = fileutils.file_extension(location).lower()

    if (any(special_name == file_base_name
            or special_name == file_extension
            for special_name in special_names_lower)
     or any(special_name in file_base_name
            or special_name in file_extension
            for special_name in special_names)):
        return 'yes'

    elif any(special_name in file_base_name
             or special_name in file_extension
            for special_name in special_names_lower):
        return 'maybe'
    else:
        # return False for now?
        pass
Exemplo n.º 21
0
def is_markup(location):
    return fileutils.file_extension(location) in extensions
Exemplo n.º 22
0
 def is_c_source(self):
     ext = fileutils.file_extension(self.location)
     if self.is_text is True and ext.lower() in C_EXTENSIONS:
         return True
     else:
         return False
Exemplo n.º 23
0
 def is_c_source(self):
     ext = fileutils.file_extension(self.location)
     if self.is_text is True and ext.lower() in C_EXTENSIONS:
         return True
     else:
         return False