def _clone(args, image, context=os): entrypoints = [] for entrypoint in args.entrypoints: if context.path.isdir(entrypoint): entrypoints.append(entrypoint) else: logger.error( "Entrypoint '{}' is not a Directory".format(entrypoint)) if not entrypoints: logger.critical("No valid Entrypoints Found!") logger.critical("Exiting...") sys.exit(-1) crawler = BaseCrawler(entrypoints, args.excluded_dirs, image=context) load_extension_list(args.load_extensions) pool = Pool(args.threads) pool.starmap( # image, (filename, filetype), context file_worker, zip(itertools.repeat(image), crawler(), itertools.repeat(context))) pool.close() pool.join() logger.info("Processed {} files".format(PROCESSED_FILES)) logger.info("Image Generated!") image_serializer = SatoriJsoner() # image_serializer = SatoriPickler() image_serializer.write(image, args.image_file) logger.warn("Stored to file '{}'".format(image_serializer.last_file))
def file_worker(image, file_desc, context=os): global PROCESSED_FILES PROCESSED_FILES += 1 filename, filetype = file_desc image.add_file(filename) func = EVENTS["imager.pre_open"] func( satori_image=image, file_path=filename, file_type=filetype, os_context=context, ) if filetype is not SE.DIRECTORY_T: if len(EVENTS["imager.with_open"]): try: fd = context.open(filename, 'rb') func = EVENTS["imager.with_open"] func( satori_image=image, file_path=filename, file_type=filetype, fd=fd, ) fd.close() func = EVENTS["imager.post_close"] func( satori_image=image, file_path=filename, file_type=filetype, os_context=context, ) except Exception as e: logger.info("%s . File '%s' could not be opened." % (e, filename))
def get_image_context_from_arg(arg, allow_local=True): from satoricore.file import load_image if arg == '.': return dummy_context(os) if allow_local: try: os.stat(arg) logger.info("Found to '{}'".format(arg)) image_path = arg source = load_image(image_path) if source != None: return dummy_context(source) except FileNotFoundError: logger.error("File '{}' could not be found".format(arg)) pass try: import satoriremote logger.info("Connecting to '{}'".format(arg)) conn_context_source, conn_dict = satoriremote.connect(arg) logger.warn("Connected to {}".format(conn_dict['host'])) return conn_context_source # with conn_context_source as context: # return context except ImportError: logger.critical( "'satori-remote' package not available, remote paths can't be used" ) sys.exit(-1) except ValueError: # If can't be parsed as regular expression logger.critical("'{}' can't be parsed as URI".format(arg)) sys.exit(-1) except ConnectionError: logger.critical("Connection failed for path '{}'".format(arg)) sys.exit(-1)
def set_diff_meta(parser, args, source, destination, results, diff_name): diff_meta = DiffMeta(source, destination) results.add_class(diff_name, section=_DIFFS_SECTION, data=diff_meta) # pprint(diff_meta) logger.info("DIFF metadata added for '{}'".format(diff_name))
def main(): parser = _setup_argument_parser() args = parser.parse_args() source_context = get_image_context_from_arg(args.original_image) logger.warn("Loaded image '{}'".format(args.original_image)) destination_context = get_image_context_from_arg(args.tested_image) logger.warn("Loaded image '{}'".format(args.tested_image)) # if not args.output: try: results = load_image(args.output) logger.warn("SatoriImage '{}' loaded to archive results".format( args.output)) except TypeError as te: logger.warn("No output image selected") logger.info("Using an Empty SatoriImage to store results") results = SatoriImage() except ValueError: logger.error("Output image file '{}' is not a SatoriImage".format( args.output)) logger.warn("Using an Empty SatoriImage to store results".format( args.output)) results = SatoriImage() assert (results is not None) try: logger.info("Adding DIFF section in SatoriImage") results.add_section(_DIFFS_SECTION) except KeyError: logger.warn("DIFF Section in SatoriImage already exists") existing_diffs = results.get_classes(_DIFFS_SECTION) if existing_diffs: logger.info("Existing DIFFs in SatoriImage: {}".format( str(existing_diffs))) name = get_diff_name(existing_diffs) global DIFF_NAME DIFF_NAME = name logger.warn("New DIFF name is '{}'".format(name)) with source_context as source: with destination_context as destination: if not args.entrypoints: # s_entrypoints try: s_epoints = source.get_entrypoints() logger.info( "Original Image entrypoints: {}".format(s_epoints)) except: logger.warn("Entrypoints for source cannot be specified.") d_epoints = set('/') try: d_epoints = destination.get_entrypoints() logger.info( "Tested Image entrypoints: {}".format(d_epoints)) except: logger.warn( "Entrypoints for destination cannot be specified.") d_epoints = set('/') common_entrypoints = s_epoints & d_epoints if not common_entrypoints: logger.critical("No common entrypoints found. Exiting...") sys.exit(-1) else: logger.info("Common Entrypoints are {}".format( str(common_entrypoints))) args.entrypoints = common_entrypoints logger.warn("Operating for entrypoints: {}".format( str(args.entrypoints))) EVENTS['differ.on_start'](parser=parser, args=args, source=source, destination=destination, results=results, diff_name=DIFF_NAME) logger.warn("Diff Process Started...") diff_images(source, destination, args.entrypoints, results) logger.warn("Diff Process Finished!") if not args.output: args.output = DIFF_NAME image_serializer = SatoriJsoner() # image_serializer = SatoriPickler() if args.output.endswith(image_serializer.suffix): image_serializer.suffix = '' image_serializer.write(results, args.output) logger.warn("Stored to file '{}'".format(image_serializer.last_file)) # print(diff_obj) # print(results) # if args.output: # pass EVENTS['differ.on_end'](results)
def _iter_entrypoints(self): for entrypoint in self.entrypoints: # Yield all directories up to the entrypoint: # Entrypoint: /var/www/html # Becomes: /, /var, /var/www, /var/www/html entry_parts = pathlib.PurePath(entrypoint).parts to_yield_parts = [] entry_path_construct = pathlib.PurePath() for entry_part in entry_parts: entry_path_construct /= entry_part to_yield_parts.append(str(entry_path_construct)) yield (to_yield_parts, []) # Create a Queue with the folder paths to crawl _folder_list = [entrypoint] for _folder_consume in _folder_list: root_path = _folder_consume dirs = [] files = [] # For Every Folder to crawl get its contents try: _folder_consume_contents = self.image.listdir( _folder_consume) except PermissionError: # If listing fails, just ignore logger.info("Directory '{}' could not be listed".format( _folder_consume)) continue except FileNotFoundError: logger.info("Directory '{}' could not be found".format( _folder_consume)) continue for _file in _folder_consume_contents: # Contruct the full path of each file file_full_path = self.image.path.join(root_path, _file) # If file/folder is to be excluded - ignore if file_full_path in self.excluded_dirs: continue # By default - treat it as regular file list_to_append = files # If it is a directory try: mode = self.image.lstat(file_full_path).st_mode except FileNotFoundError: logger.info("File '{}' could not be found".format( file_full_path)) continue if S_ISDIR(mode): # Treat it as a directory # Get it into queue to dive in later list_to_append = dirs _folder_list.append(file_full_path) list_to_append.append(file_full_path) # yield all collected files from consumed directory yield (dirs, files)