Пример #1
0
    def moveProcessedFiles(self):
        json_list = self.hash_json.data()

        for key in json_list:

            source_path = self.storage.hd2_data(key)

            if (FileStatus.COMPLETED == json_list[key]["file_status"]):
                destination_path = self.storage.hd2_processed(key)

                if folder_exists(destination_path):
                    folder_delete_all(destination_path)

                shutil.move(source_path, destination_path)

            if (FileStatus.FAILED == json_list[key]["file_status"]):

                meta_service = Metadata_Service()
                meta_service.get_from_file(source_path)
                metadata = meta_service.metadata
                if ("Engine response could not be decoded" == metadata.get_error()) and \
                    metadata.get_original_file_extension() in ['.xml', '.json']:
                    destination_path = self.storage.hd2_not_processed(key)

                    if folder_exists(destination_path):
                        folder_delete_all(destination_path)

                    shutil.move(source_path, destination_path)
Пример #2
0
    def __init__(self):
        self.config = Config()
        self.meta_service = Metadata_Service()
        self.status = Status()
        self.storage = Storage()
        self.file_name = None  # set in process() method
        self.current_path = None
        self.base_folder = None
        self.dst_folder = None
        self.dst_file_name = None

        self.status = Status()
        self.status.reset()
Пример #3
0
    def get_file_analysis(self, index, report_json):
        try:
            meta_service = Metadata_Service()

            metadata = meta_service.get_from_file(index)

            self.file_analysis_data = {}
            self.file_analysis_data["file_name"] = metadata.data.get(
                'file_name')
            self.file_analysis_data[
                "rebuild_file_extension"] = metadata.data.get(
                    'rebuild_file_extension')
            self.file_analysis_data["rebuild_file_size"] = metadata.data.get(
                'rebuild_file_size')

            self.file_analysis_data["original_hash"] = index
            self.file_analysis_data["rebuild_hash"] = metadata.data.get(
                'rebuild_hash')

            self.file_analysis_data["file_type"] = report_json["gw:GWallInfo"][
                "gw:DocumentStatistics"]["gw:DocumentSummary"]["gw:FileType"]
            self.file_analysis_data["file_size"] = int(
                report_json["gw:GWallInfo"]["gw:DocumentStatistics"]
                ["gw:DocumentSummary"]["gw:TotalSizeInBytes"])

            self.file_analysis_data["remediated_item_count"], \
            self.file_analysis_data["remediate_items_list"]       = self.get_remediated_item_details(report_json)

            self.file_analysis_data["sanitised_item_count"], \
            self.file_analysis_data["sanitised_items_list"]       = self.get_sanitisation_item_details(report_json)

            self.file_analysis_data["issue_item_count"],\
            self.file_analysis_data["issue_item_list"]            = self.get_issue_item_details(report_json)

            self.file_analysis_data[
                "threat_analysis"] = self.get_threat_analysis(
                    self.file_analysis_data["sanitised_items_list"])

            return self.file_analysis_data

        except Exception as error:
            log_error(
                message=
                f"Error in get_file_analysis from json data {dir} : {error}")
 def setUpClass(cls) -> None:
     super().setUpClass()
     cls.test_file       = Test_Data().create_test_pdf(text=random_text(prefix="some random text: "))
     cls.test_file_name  = file_name(cls.test_file)
     cls.config          = Config()
     #cls.temp_root       = folder_create('/tmp/temp_root') # temp_folder()
     #cls.config.set_root_folder(root_folder=cls.temp_root)
     cls.meta_service    = Metadata_Service()
     cls.metadata        = cls.meta_service.create_metadata(cls.test_file)
     cls.analysis_json = Analysis_Json()
    def setUpClass(cls) -> None:
        super().setUpClass()
        cls.test_file = temp_file(
            contents='Static text so that we have a static hash')
        cls.file_hash = '500286533bf75d769e9180a19414d1c3502dd52093e7351a0a9b1385d8f8961c'
        cls.metadata_elastic = Metadata_Elastic()
        Setup_Testing().configure_metadata_elastic(cls.metadata_elastic)
        cls.metadata_service = Metadata_Service()
        cls.metadata_service.metadata_elastic = cls.metadata_elastic

        if cls.metadata_elastic.enabled is False:
            pytest.skip('Elastic server not available')
Пример #6
0
    def updateHashJson(self):
        self.hash_json.reset()
        meta_service = Metadata_Service()

        for hash_folder in os.listdir(self.storage.hd2_data()):

            metadata_folder = self.storage.hd2_data(hash_folder)

            if not os.path.isdir(metadata_folder):
                continue

            metadata = meta_service.get_from_file(metadata_folder)
            file_name = metadata.get_file_name()
            original_hash = metadata.get_original_hash()
            status = metadata.get_rebuild_status()

            if status != FileStatus.COMPLETED:
                self.hash_json.add_file(original_hash, file_name)

        self.hash_json.save()
        self.status.set_processing_counters(len(self.hash_json.data()))
        return self.hash_json.data()
Пример #7
0
    def ProcessDirectory(self, thread_data):
        (itempath, file_hash, process_index) = thread_data
        endpoint_index = process_index % self.config.endpoints_count
        if not Loops.continue_processing:
            return False
        tik = datetime.now()
        process_result = self.ProcessDirectoryWithEndpoint(
            itempath, file_hash, endpoint_index)

        if process_result:
            self.status.add_completed()

            tok = datetime.now()
            delta = tok - tik

            meta_service = Metadata_Service()
            meta_service.set_hd2_to_hd3_copy_time(itempath,
                                                  delta.total_seconds())
        else:
            self.status.add_failed()

        return process_result
Пример #8
0
    def ProcessDirectoryWithEndpoint(self, itempath, file_hash,
                                     endpoint_index):

        if not os.path.isdir(itempath):
            return False

        log_info(
            message=
            f"Starting ProcessDirectoryWithEndpoint on endpoint # {endpoint_index} for file {file_hash}"
        )
        meta_service = Metadata_Service()
        original_file_path = meta_service.get_original_file_paths(itempath)
        events = Events_Log(itempath)

        endpoint = "http://" + self.config.endpoints['Endpoints'][
            endpoint_index]['IP'] + ":" + self.config.endpoints['Endpoints'][
                endpoint_index]['Port']
        events.add_log("Processing with: " + endpoint)

        meta_service.set_f2f_plugin_version(itempath, API_VERSION)
        meta_service.set_f2f_plugin_git_commit(itempath, self.git_commit())

        try:
            file_processing = File_Processing(events, self.events_elastic,
                                              self.report_elastic,
                                              self.analysis_elastic,
                                              meta_service)
            if not file_processing.processDirectory(endpoint, itempath):
                events.add_log("CANNOT be processed")
                return False

            log_data = {
                'file': original_file_path,
                'status': FileStatus.COMPLETED,
                'error': 'none',
                'timestamp': datetime.now(),
            }
            log_info('ProcessDirectoryWithEndpoint', data=log_data)
            meta_service.set_error(itempath, "none")
            meta_service.set_status(itempath, FileStatus.COMPLETED)
            self.hash_json.update_status(file_hash, FileStatus.COMPLETED)
            events.add_log("Has been processed")
            return True
        except Exception as error:
            log_data = {
                'file': original_file_path,
                'status': FileStatus.FAILED,
                'error': str(error),
            }
            log_error(message='error in ProcessDirectoryWithEndpoint',
                      data=log_data)
            meta_service.set_error(itempath, str(error))
            meta_service.set_status(itempath, FileStatus.FAILED)
            self.hash_json.update_status(file_hash, FileStatus.FAILED)
            events.add_log("ERROR:" + str(error))
            return False
 def setUp(self) -> None:
     self.metadata_service = Metadata_Service()
     self.test_file = Test_Data().images().pop()
     assert file_exists(self.test_file)
class test_Metadata_Service(TestCase):
    def setUp(self) -> None:
        self.metadata_service = Metadata_Service()
        self.test_file = Test_Data().images().pop()
        assert file_exists(self.test_file)

    def test_create_metadata(self):

        metadata = self.metadata_service.create_metadata(self.test_file)
        metadata.delete()
        metadata.add_file(self.test_file)
        assert metadata.data == {
            'file_name':
            file_name(self.test_file),
            'xml_report_status':
            None,
            'last_update_time':
            metadata.get_last_update_time(),
            'rebuild_server':
            None,
            'server_version':
            None,
            'error':
            None,
            'original_file_paths': [self.test_file],
            'original_hash':
            file_sha256(self.test_file),
            'original_hash_calculation_time':
            metadata.data.get('original_hash_calculation_time'),
            'original_file_extension':
            '.jpg',
            'original_file_size':
            97610,
            'rebuild_file_path':
            None,
            'rebuild_hash':
            None,
            'rebuild_status':
            FileStatus.INITIAL,
            'rebuild_file_extension':
            None,
            'rebuild_file_size':
            None,
            'rebuild_file_duration':
            None,
            'f2f_plugin_version':
            None,
            'f2f_plugin_git_commit':
            None,
            'hd1_to_hd2_copy_time':
            None,
            'hd2_to_hd3_copy_time':
            None
        }
        assert metadata.delete() is True

    def test_file_hash(self):
        hash = self.metadata_service.file_hash(self.test_file)
        assert hash == file_sha256(self.test_file)

    def test_file_hash_metadata(self):
        pass
Пример #11
0
class Pre_Processor:
    def __init__(self):
        self.config = Config()
        self.meta_service = Metadata_Service()
        self.status = Status()
        self.storage = Storage()
        self.file_name = None  # set in process() method
        self.current_path = None
        self.base_folder = None
        self.dst_folder = None
        self.dst_file_name = None

        self.status = Status()
        self.status.reset()

        #self.analysis_json = Analysis_Json()

    @log_duration
    def clear_data_and_status_folders(self):
        data_target = self.storage.hd2_data(
        )  # todo: refactor this clean up to the storage class
        status_target = self.storage.hd2_status()
        processed_target = self.storage.hd2_processed()
        folder_delete_all(data_target)
        folder_delete_all(status_target)
        folder_delete_all(processed_target)
        folder_create(data_target)
        folder_create(status_target)
        folder_create(processed_target)
        self.status.reset()

    def file_hash(self, file_path):
        return self.meta_service.file_hash(file_path)

    def prepare_folder(self, folder_to_process):
        if folder_to_process.startswith(self.storage.hd1()):
            return folder_to_process

        dirname = os.path.join(self.storage.hd1(),
                               os.path.basename(folder_to_process))
        if os.path.isdir(dirname):
            folder_delete_all(dirname)
        try:
            folder_copy(folder_to_process, dirname)
        finally:
            return dirname

    def process_folder(self, folder_to_process):
        if not os.path.isdir(folder_to_process):
            # todo: add an event log
            return False

        folder_to_process = self.prepare_folder(folder_to_process)

        files_count = 0

        for folderName, subfolders, filenames in os.walk(folder_to_process):
            for filename in filenames:
                file_path = os.path.join(folderName, filename)
                if os.path.isfile(file_path):
                    files_count += 1

        self.status.set_files_count(files_count)

        for folderName, subfolders, filenames in os.walk(folder_to_process):
            for filename in filenames:
                file_path = os.path.join(folderName, filename)
                if os.path.isfile(file_path):
                    self.process(file_path)

        return True

    @log_duration
    def process_files(self):
        self.status.StartStatusThread()
        self.status.set_phase_1()
        self.process_folder(self.storage.hd1())
        self.status.set_phase_2()
        self.status.StopStatusThread()

    @log_duration
    def process(self, file_path):
        tik = datetime.now()

        metadata = self.meta_service.create_metadata(file_path=file_path)
        file_name = metadata.get_file_name()
        original_hash = metadata.get_original_hash()
        status = metadata.get_rebuild_status()
        self.update_status(file_name, original_hash, status)

        tok = datetime.now()
        delta = tok - tik

        if metadata.is_in_todo():
            hash_folder_path = self.storage.hd2_data(original_hash)
            self.meta_service.set_hd1_to_hd2_copy_time(hash_folder_path,
                                                       delta.total_seconds())
        else:
            self.status.set_not_copied()

    def update_status(self, file_name, original_hash, status):
        if status == FileStatus.INITIAL:
            self.status.add_file()