def test_compiled_python_1(self): test_dir = self.extract_test_zip( 'contenttype/compiled/python/compiled.zip') test_file = os.path.join(test_dir, 'command.pyc') assert get_filetype(test_file) == 'python 2.5 byte-compiled' assert not is_source(test_file) assert not is_text(test_file) expected_mime = ( 'application/octet-stream', # libmagic 5.39 'text/x-bytecode.python', ) assert get_mimetype_file(test_file) in expected_mime assert get_filetype_pygment(test_file) == '' test_file2 = os.path.join(test_dir, 'contenttype.pyc') assert is_binary(test_file2) assert get_pygments_lexer(test_file2) is None test_file3 = os.path.join(test_dir, 'contenttype.pyo') assert is_binary(test_file3) assert get_pygments_lexer(test_file3) is None test_file4 = os.path.join(test_dir, 'extract.pyc') assert get_filetype(test_file4) == 'python 2.5 byte-compiled' assert not is_source(test_file4) assert not is_text(test_file4) assert get_mimetype_file(test_file4) in expected_mime assert get_filetype_pygment(test_file4) == ''
def test_config_linux_conf(self): test_file = self.get_test_loc('contenttype/config/defconfig-ar531x-jffs2') assert 'linux make config build file (old)' == get_filetype(test_file) assert not is_source(test_file) assert is_text(test_file) assert '' == get_filetype_pygment(test_file) assert 'linux make config build file (old)' == get_filetype(test_file) assert 'text/plain' == get_mimetype_file(test_file)
def test_doc_xml(self): assert not is_binary(self.get_test_loc('contenttype/doc/xml/simple.xml')) assert 'xml language text' == get_filetype(self.get_test_loc('contenttype/doc/xml/simple.xml')) assert not is_binary(self.get_test_loc('contenttype/doc/xml/some.xml')) assert 'xml language text' == get_filetype(self.get_test_loc('contenttype/doc/xml/some.xml')) assert not is_binary(self.get_test_loc('contenttype/doc/xml/somespring.xml')) assert 'xml language text' == get_filetype(self.get_test_loc('contenttype/doc/xml/somespring.xml'))
def test_doc_office_word(self): assert 'microsoft word 2007+' == get_filetype(self.get_test_loc('contenttype/doc/office/document')) assert 'microsoft word 2007+' == get_filetype(self.get_test_loc('contenttype/doc/office/document.doc')) assert not is_special(self.get_test_loc('contenttype/doc/office/word.doc')) assert '' == get_filetype_pygment(self.get_test_loc('contenttype/doc/office/word.doc')) assert 'application/msword' == get_mimetype_file(self.get_test_loc('contenttype/doc/office/word.doc')) assert get_filetype(self.get_test_loc('contenttype/doc/office/word.doc')).startswith('composite document file v2 document') assert get_filetype_file(self.get_test_loc('contenttype/doc/office/word.doc')).startswith('Composite Document File V2 Document') assert 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' == get_mimetype_file(self.get_test_loc('contenttype/doc/office/word.docx'))
def test_binary_random_data(self): assert 'data' == get_filetype(self.get_test_loc('contenttype/binary-random/binary_random_0')) assert 'data' == get_filetype(self.get_test_loc('contenttype/binary-random/binary_random_1')) assert 'data' == get_filetype(self.get_test_loc('contenttype/binary-random/binary_random_2')) assert 'data' == get_filetype(self.get_test_loc('contenttype/binary-random/binary_random_3')) assert 'data' == get_filetype(self.get_test_loc('contenttype/binary-random/binary_random_4')) assert 'data' == get_filetype(self.get_test_loc('contenttype/binary-random/binary_random_5')) assert 'data' == get_filetype(self.get_test_loc('contenttype/binary-random/binary_random_6')) assert 'data' == get_filetype(self.get_test_loc('contenttype/binary-random/binary_random_7')) assert 'data' == get_filetype(self.get_test_loc('contenttype/binary-random/binary_random_8'))
def test_package_rpm(self): test_file = self.get_test_loc('contenttype/package/wget-1.11.4-3.fc11.i586.rpm') assert 'rpm v3.0 bin i386/x86_64' == get_filetype(test_file) assert is_binary(test_file) assert is_archive(test_file) assert is_compressed(test_file) assert not contains_text(test_file)
def test_package_python_egg(self): test_file = self.get_test_loc('contenttype/package/TicketImport-0.7a-py2.5.egg') assert 'zip archive data, at least v2.0 to extract' == get_filetype(test_file) assert is_binary(test_file) assert is_compressed(test_file) assert is_archive(test_file) assert not contains_text(test_file)
def test_package_java_war(self): test_file = self.get_test_loc('contenttype/package/c.war') assert 'zip archive data, at least v1.0 to extract' == get_filetype(test_file) assert is_binary(test_file) assert is_compressed(test_file) assert is_archive(test_file) assert not contains_text(test_file)
def test_doc_office_word_3(self): test_file = self.get_test_loc('contenttype/doc/office/word.doc') assert not is_special(test_file) assert '' == get_filetype_pygment(test_file) assert 'application/msword' == get_mimetype_file(test_file) assert get_filetype(test_file).startswith('composite document file v2 document') assert get_filetype_file(test_file).startswith('Composite Document File V2 Document')
def test_doc_pdf_1(self): test_file = self.get_test_loc('contenttype/doc/pdf/a.pdf') assert is_pdf(test_file) assert is_pdf_with_text(test_file) assert 'pdf document, version 1.2' == get_filetype(test_file) assert not is_media(test_file) assert is_binary(test_file)
def test_text_log(self): assert not is_source(self.get_test_loc('contenttype/text/windowserver.log')) assert is_text(self.get_test_loc('contenttype/text/windowserver.log')) assert '' == get_filetype_pygment(self.get_test_loc('contenttype/text/windowserver.log')) assert 'ascii text' == get_filetype(self.get_test_loc('contenttype/text/windowserver.log')) assert 'ASCII text' == get_filetype_file(self.get_test_loc('contenttype/text/windowserver.log')) assert 'text/plain' == get_mimetype_file(self.get_test_loc('contenttype/text/windowserver.log'))
def test_package_java_jar_as_zip(self): test_file = self.get_test_loc('contenttype/package/ant.zip') assert 'java archive data (jar)' == get_filetype(test_file) assert is_binary(test_file) assert is_compressed(test_file) assert is_archive(test_file) assert not contains_text(test_file)
def test_text_license_copying(self): test_file = self.get_test_loc('contenttype/text/COPYING') assert 'ascii text' in get_filetype(test_file) assert not is_source(test_file) assert is_text(test_file) assert '' == get_filetype_pygment(test_file) assert 'text/plain' == get_mimetype_file(test_file)
def test_package_debian(self): test_file = self.get_test_loc( 'contenttype/package/wget-el_0.5.0-8_all.deb') assert 'debian binary package (format 2.0)' == get_filetype(test_file) assert is_binary(test_file) assert is_archive(test_file) assert not contains_text(test_file)
def test_code_java_non_ascii(self): assert is_source(self.get_test_loc('contenttype/code/java/ChartTiming1.java')) assert is_text(self.get_test_loc('contenttype/code/java/ChartTiming1.java')) # FIXME: incorrect assert 'application/octet-stream' == get_mimetype_file(self.get_test_loc('contenttype/code/java/ChartTiming1.java')) assert 'Java' == get_filetype_pygment(self.get_test_loc('contenttype/code/java/ChartTiming1.java')) assert 'java language text' == get_filetype(self.get_test_loc('contenttype/code/java/ChartTiming1.java'))
def test_package_rubygem(self): test_file = self.get_test_loc('contenttype/package/rubygems-update-1.4.1.gem') assert 'posix tar archive' == get_filetype(test_file) assert is_binary(test_file) assert is_compressed(test_file) assert is_archive(test_file) assert not contains_text(test_file)
def test_archive_tar_bz2(self): test_file = self.get_test_loc('contenttype/archive/e.tar.bz2') assert is_binary(test_file) assert is_archive(test_file) assert 'bzip2 compressed data, block size = 900k' == get_filetype(test_file) assert is_compressed(test_file) assert not contains_text(test_file)
def test_compiled_python_4(self): test_dir = self.extract_test_zip('contenttype/compiled/python/compiled.zip') assert 'python 2.5 byte-compiled' == get_filetype(os.path.join(test_dir, 'extract.pyc')) assert not is_source(os.path.join(test_dir, 'extract.pyc')) assert not is_text(os.path.join(test_dir, 'extract.pyc')) assert 'application/octet-stream' == get_mimetype_file(os.path.join(test_dir, 'extract.pyc')) assert '' == get_filetype_pygment(os.path.join(test_dir, 'extract.pyc'))
def test_archive_tar_posix(self): test_file = self.get_test_loc('contenttype/archive/posixnotgnu.tar') assert is_binary(test_file) assert is_archive(test_file) assert 'posix tar archive' == get_filetype(test_file) assert not is_compressed(test_file) assert not contains_text(test_file)
def test_archive_tar_gz_3(self): test_file = self.get_test_loc('contenttype/archive/e.tar.gz') assert is_binary(test_file) assert is_archive(test_file) assert get_filetype(test_file).startswith('gzip compressed data') assert is_compressed(test_file) assert not contains_text(test_file)
def test_ar_archive_win_library(self): test_file = self.get_test_loc('contenttype/archive/win-archive.lib') assert is_binary(test_file) assert is_archive(test_file) assert 'current ar archive' == get_filetype(test_file) assert not is_compressed(test_file) assert not contains_text(test_file)
def test_doc_html_3(self): assert is_source(self.get_test_loc('contenttype/doc/html/Label.html')) assert is_text(self.get_test_loc('contenttype/doc/html/Label.html')) assert 'HTML' == get_filetype_pygment(self.get_test_loc('contenttype/doc/html/Label.html')) assert 'html language text' == get_filetype(self.get_test_loc('contenttype/doc/html/Label.html')) assert 'text/html' == get_mimetype_file(self.get_test_loc('contenttype/doc/html/Label.html')) assert 'HTML document, ASCII text, with very long lines' == get_filetype_file(self.get_test_loc('contenttype/doc/html/Label.html'))
def test_code_cpp_non_ascii(self): test_file = self.get_test_loc('contenttype/code/cpp/non_ascii.cpp') assert is_source(test_file) assert is_text(test_file) assert 'application/octet-stream' == get_mimetype_file(test_file) assert 'C++' == get_filetype_pygment(test_file) assert 'c++ language text' == get_filetype(test_file)
def test_code_python_2(self): assert is_source(self.get_test_loc('contenttype/code/python/extract.py')) assert is_text(self.get_test_loc('contenttype/code/python/extract.py')) assert 'Python' == get_filetype_pygment(self.get_test_loc('contenttype/code/python/extract.py')) assert 'python language text' == get_filetype(self.get_test_loc('contenttype/code/python/extract.py')) assert 'text/x-python' == get_mimetype_file(self.get_test_loc('contenttype/code/python/extract.py')) assert get_filetype_file(self.get_test_loc('contenttype/code/python/extract.py')).startswith('Python script')
def test_doc_html_2(self): assert is_source(self.get_test_loc('contenttype/doc/html/allclasses-frame.html')) assert is_text(self.get_test_loc('contenttype/doc/html/allclasses-frame.html')) assert 'HTML' == get_filetype_pygment(self.get_test_loc('contenttype/doc/html/allclasses-frame.html')) assert 'html language text' == get_filetype(self.get_test_loc('contenttype/doc/html/allclasses-frame.html')) assert 'text/html' == get_mimetype_file(self.get_test_loc('contenttype/doc/html/allclasses-frame.html')) assert 'HTML document, ASCII text' == get_filetype_file(self.get_test_loc('contenttype/doc/html/allclasses-frame.html'))
def test_code_c_6(self): assert is_source(self.get_test_loc('contenttype/code/c/pci_v3.c')) assert is_text(self.get_test_loc('contenttype/code/c/pci_v3.c')) assert 'C source, ASCII text' == get_filetype_file(self.get_test_loc('contenttype/code/c/pci_v3.c')) assert 'C' == get_filetype_pygment(self.get_test_loc('contenttype/code/c/pci_v3.c')) assert 'c language text' == get_filetype(self.get_test_loc('contenttype/code/c/pci_v3.c')) assert 'text/x-c' == get_mimetype_file(self.get_test_loc('contenttype/code/c/pci_v3.c'))
def test_code_cpp_1(self): test_file = self.get_test_loc('contenttype/code/cpp/stacktrace.cpp') assert is_source(test_file) assert is_text(test_file) assert 'C++' == get_filetype_pygment(test_file) assert 'c++ language text' == get_filetype(test_file) assert 'text/x-c' == get_mimetype_file(test_file)
def test_build_makefile(self): assert is_source(self.get_test_loc('contenttype/build/Makefile')) assert is_text(self.get_test_loc('contenttype/build/Makefile')) assert 'Makefile' == get_filetype_pygment(self.get_test_loc('contenttype/build/Makefile')) assert 'ASCII text' == get_filetype_file(self.get_test_loc('contenttype/build/Makefile')) assert 'makefile language text' == get_filetype(self.get_test_loc('contenttype/build/Makefile')) assert 'text/plain' == get_mimetype_file(self.get_test_loc('contenttype/build/Makefile'))
def test_debian_package(self): test_file = self.get_test_loc( 'contenttype/package/libjama-dev_1.2.4-2_all.deb') assert 'debian binary package (format 2.0)' == get_filetype(test_file) assert is_binary(test_file) assert is_archive(test_file) assert not contains_text(test_file)
def test_code_c_4(self): test_file = self.get_test_loc('contenttype/code/c/mm.c') assert is_source(test_file) assert is_text(test_file) assert 'C' == get_filetype_pygment(test_file) assert 'c language text' == get_filetype(test_file) assert 'text/x-c' == get_mimetype_file(test_file)
def test_doc_html_2(self): test_file = self.get_test_loc('contenttype/doc/html/allclasses-frame.html') assert is_source(test_file) assert is_text(test_file) assert 'HTML' == get_filetype_pygment(test_file) assert 'html language text' == get_filetype(test_file) assert 'text/html' == get_mimetype_file(test_file) assert 'HTML document, ASCII text' == get_filetype_file(test_file)
def test_code_groff(self): assert not is_special(self.get_test_loc(u'contenttype/code/groff/example.ms')) assert is_text(self.get_test_loc(u'contenttype/code/groff/example.ms')) assert 'Groff' == get_filetype_pygment(self.get_test_loc(u'contenttype/code/groff/example.ms')) assert 'groff language text' == get_filetype(self.get_test_loc(u'contenttype/code/groff/example.ms')) assert 'text/troff' == get_mimetype_python(self.get_test_loc(u'contenttype/code/groff/example.ms')) assert 'text/troff' == get_mimetype_file(self.get_test_loc(u'contenttype/code/groff/example.ms')) assert get_filetype_file(self.get_test_loc(u'contenttype/code/groff/example.ms')).startswith('troff or preprocessor input')
def test_doc_html_3(self): test_file = self.get_test_loc('contenttype/doc/html/Label.html') assert is_source(test_file) assert is_text(test_file) assert 'HTML' == get_filetype_pygment(test_file) assert 'html language text' == get_filetype(test_file) assert 'text/html' == get_mimetype_file(test_file) assert 'HTML document, ASCII text, with very long lines' == get_filetype_file(test_file)
def test_code_java_non_ascii(self): test_file = self.get_test_loc('contenttype/code/java/ChartTiming1.java') assert is_source(test_file) assert is_text(test_file) # FIXME: incorrect assert 'application/octet-stream' == get_mimetype_file(test_file) assert 'Java' == get_filetype_pygment(test_file) assert 'java language text' == get_filetype(test_file)
def test_code_python_2(self): test_file = self.get_test_loc('contenttype/code/python/extract.py') assert is_source(test_file) assert is_text(test_file) assert 'Python' == get_filetype_pygment(test_file) assert 'python language text' == get_filetype(test_file) assert 'text/x-python' == get_mimetype_file(test_file) assert get_filetype_file(test_file).startswith('Python script')
def test_text_log(self): test_file = self.get_test_loc('contenttype/text/windowserver.log') assert not is_source(test_file) assert is_text(test_file) assert '' == get_filetype_pygment(test_file) assert 'ascii text' == get_filetype(test_file) assert 'ASCII text' == get_filetype_file(test_file) assert 'text/plain' == get_mimetype_file(test_file)
def test_build_makefile(self): test_file = self.get_test_loc('contenttype/build/Makefile') assert is_source(test_file) assert is_text(test_file) assert 'Makefile' == get_filetype_pygment(test_file) assert 'ASCII text' == get_filetype_file(test_file) assert 'makefile language text' == get_filetype(test_file) assert 'text/plain' == get_mimetype_file(test_file)
def test_contenttype_fifo(self): test_dir = self.get_temp_dir() myfifo = os.path.join(test_dir, 'myfifo') import subprocess if subprocess.call(['mkfifo', myfifo]) != 0: self.fail('Unable to create fifo') assert os.path.exists(myfifo) assert is_special(myfifo) assert 'FIFO pipe' == get_filetype(myfifo)
def test_code_groff(self): assert not is_special(self.get_test_loc(u'contenttype/code/groff/example.ms')) assert is_text(self.get_test_loc(u'contenttype/code/groff/example.ms')) assert 'Groff' == get_filetype_pygment(self.get_test_loc(u'contenttype/code/groff/example.ms')) assert 'groff language text' == get_filetype(self.get_test_loc(u'contenttype/code/groff/example.ms')) # the Apache mimes do not have .ms in their types # but the type is still mysteriously returnedd on Windows assert '' == get_mimetype_python(self.get_test_loc(u'contenttype/code/groff/example.ms')) assert 'text/troff' == get_mimetype_file(self.get_test_loc(u'contenttype/code/groff/example.ms')) assert get_filetype_file(self.get_test_loc(u'contenttype/code/groff/example.ms')).startswith('troff or preprocessor input')
def test_archive_tar_gz_1(self): test_file = self.get_test_loc('contenttype/archive/a.tar.gz') assert not is_source(test_file) assert not is_text(test_file) assert '' == get_filetype_pygment(test_file) assert 'application/x-gzip' == get_mimetype_file(test_file) assert get_filetype(test_file).startswith('gzip compressed data') assert is_archive(test_file) assert is_compressed(test_file) assert not contains_text(test_file)
def test_text_license_credits(self): # FIXME assert 'css+lasso language text' == get_filetype(self.get_test_loc('contenttype/text/CREDITS')) assert is_text(self.get_test_loc('contenttype/text/CREDITS')) # FIXME: incorrect assert is_source(self.get_test_loc('contenttype/text/CREDITS')) # FIXME: incorrect assert 'CSS+Lasso' == get_filetype_pygment(self.get_test_loc('contenttype/text/CREDITS')) assert 'ISO-8859 text' == get_filetype_file(self.get_test_loc('contenttype/text/CREDITS')) assert 'text/plain' == get_mimetype_file(self.get_test_loc('contenttype/text/CREDITS'))
def test_code_c_7(self): test_file = self.get_test_loc('contenttype/code/c/some.c') expected = ( # incorrect p to libmagic 5.38 'ti-xx graphing calculator (flash)', # correct in libmagic 5.39+ 'c source, ascii text', ) assert get_filetype(test_file) in expected assert is_source(test_file) assert get_filetype_pygment(test_file) == 'C'
def test_text_license_credits(self): # FIXME test_file = self.get_test_loc('contenttype/text/CREDITS') assert 'css+lasso language text' == get_filetype(test_file) assert is_text(test_file) # FIXME: incorrect assert is_source(test_file) # FIXME: incorrect assert 'CSS+Lasso' == get_filetype_pygment(test_file) assert 'ISO-8859 text' == get_filetype_file(test_file) assert 'text/plain' == get_mimetype_file(test_file)
def test_text_license_copying(self): assert 'ascii text' in get_filetype(self.get_test_loc('contenttype/text/COPYING')) assert not is_source(self.get_test_loc('contenttype/text/COPYING')) assert is_text(self.get_test_loc('contenttype/text/COPYING')) assert '' == get_filetype_pygment(self.get_test_loc('contenttype/text/COPYING')) assert 'text/plain' == get_mimetype_file(self.get_test_loc('contenttype/text/COPYING'))
def test_debug_win_pdb(self): assert is_binary(self.get_test_loc('contenttype/debug/QTMovieWin.pdb')) assert 'msvc program database ver \\004' == get_filetype(self.get_test_loc('contenttype/debug/QTMovieWin.pdb'))
def test_script_windows_bat(self): assert 'batchfile language text' == get_filetype(self.get_test_loc('contenttype/script/build_w32vc.bat')) assert 'batchfile language text' == get_filetype(self.get_test_loc('contenttype/script/zip_src.bat'))
def test_script_install(self): assert 'ascii text' == get_filetype(self.get_test_loc('contenttype/script/install'))
def test_package_rubygem(self): assert 'posix tar archive' == get_filetype(self.get_test_loc('contenttype/package/rubygems-update-1.4.1.gem'))
def test_script_bash(self): assert 'bash language text' == get_filetype(self.get_test_loc('contenttype/script/test.sh'))
def test_package_python_egg(self): assert is_binary(self.get_test_loc('contenttype/package/TicketImport-0.7a-py2.5.egg')) assert is_archive(self.get_test_loc('contenttype/package/TicketImport-0.7a-py2.5.egg')) assert 'zip archive data, at least v2.0 to extract' == get_filetype(self.get_test_loc('contenttype/package/TicketImport-0.7a-py2.5.egg'))
def test_package_rpm(self): assert 'rpm v3.0 bin i386/x86_64' == get_filetype(self.get_test_loc('contenttype/package/wget-1.11.4-3.fc11.i586.rpm'))
def test_package_java_jar_as_zip(self): assert is_binary(self.get_test_loc('contenttype/package/ant.zip')) assert is_archive(self.get_test_loc('contenttype/package/ant.zip')) assert 'java archive data (jar)' == get_filetype(self.get_test_loc('contenttype/package/ant.zip'))
def test_package_java_war(self): assert is_binary(self.get_test_loc('contenttype/package/c.war')) assert is_archive(self.get_test_loc('contenttype/package/c.war')) assert 'zip archive data, at least v1.0 to extract' == get_filetype(self.get_test_loc('contenttype/package/c.war'))
def test_package_java_jar(self): assert is_binary(self.get_test_loc('contenttype/package/ant-jsch-1.7.0.jar')) assert is_archive(self.get_test_loc('contenttype/package/ant-jsch-1.7.0.jar')) assert 'java archive data (jar)' == get_filetype(self.get_test_loc('contenttype/package/ant-jsch-1.7.0.jar'))
def test_package_debian(self): assert 'debian binary package (format 2.0)' == get_filetype(self.get_test_loc('contenttype/package/wget-el_0.5.0-8_all.deb'))
def test_media_image_bmp_2(self): assert 'pc bitmap, windows 3.x format, 400 x 32 x 4' == get_filetype(self.get_test_loc('contenttype/media/TBarLrge.bmp')) assert 'pc bitmap, windows 3.x format, 210 x 16 x 4' == get_filetype(self.get_test_loc('contenttype/media/TBarMedm.bmp'))
def test_doc_pdf_1(self): assert is_pdf(self.get_test_loc('contenttype/doc/pdf/a.pdf')) assert is_pdf_with_text(self.get_test_loc('contenttype/doc/pdf/a.pdf')) assert 'pdf document, version 1.2' == get_filetype(self.get_test_loc('contenttype/doc/pdf/a.pdf')) assert not is_media(self.get_test_loc('contenttype/doc/pdf/a.pdf')) assert is_binary(self.get_test_loc('contenttype/doc/pdf/a.pdf'))
def test_doc_pdf_3(self): assert is_pdf(self.get_test_loc('contenttype/doc/pdf/pdf.pdf')) assert is_pdf_with_text(self.get_test_loc('contenttype/doc/pdf/pdf.pdf')) assert 'pdf document, version 1.4' == get_filetype(self.get_test_loc('contenttype/doc/pdf/pdf.pdf'))