def setup_logger(logger_name: str, log_file: Path) -> None: """ Sets up a logger with the provided name and log output. The level is set in 'wades_config.py' :raises TypeError if logger_name is not of type 'str', or if log_file i not of type 'pathlib.Path'. :param logger_name: The name of the new logger. It should be the class name. :type logger_name: str :param log_file: The file where the logs should be outputted. :type log_file: pathlib.Path """ if not isinstance(logger_name, str): raise TypeError(expected_type_but_received_message.format("logger_name", "str", logger_name)) if not isinstance(log_file, Path): raise TypeError(expected_type_but_received_message.format("log_file", "pathlib.Path", log_file)) if not log_file.parent.exists(): log_file.mkdir(parents=True, exist_ok=True) logger = logging.getLogger(logger_name) formatter = logging.Formatter('%(asctime)s : %(message)s') file_handler = RotatingFileHandler(filename=log_file, mode='a+', maxBytes=log_file_max_size_bytes, backupCount=max_number_rotating_log_files) file_handler.setFormatter(formatter) stream_handler = logging.StreamHandler() stream_handler.setFormatter(formatter) logger.setLevel(logging_level) logger.addHandler(file_handler) logger.addHandler(stream_handler)
def __add_processes_to_application_profile_and_save( self, application_name: str, application_processes: List[dict]) -> None: """ Adds the process information to its respective application profile. :raises TypeError if application_name is not of type 'str' or application_processes is not pf type 'List[dict]'. :raises ValueError if at least one of the processes' names is not equal to the application_name provided. :param application_name: The name of the application. :type application_name: str :param application_processes: The list of processes associated to the application. :type application_processes: List[dict] """ if not isinstance(application_name, str): raise TypeError( expected_type_but_received_message.format( "application_name", 'str', application_name)) if not isinstance(application_processes, list): raise TypeError( expected_type_but_received_message.format( "application_processes", 'List[dict]', application_processes)) saved_app_profile = AppProfileDataManager.get_saved_profile( application_name) if saved_app_profile is None: saved_app_profile = AppProfile(application_name=application_name) for process in application_processes: if not isinstance(process, dict): raise TypeError( expected_type_but_received_message.format( "application_processes", 'List[dict]', application_processes)) process_name = process[ProcessAttribute.name.name] if process_name != application_name: raise ValueError( expected_application_message.format( application_name, process_name)) rss_memory = process[ProcessAttribute.memory_info.name].rss children_count = process[ProcessAttribute.children_count.name] users = [process[ProcessAttribute.username.name]] if process[ProcessAttribute.username.name] is not None \ else list() open_files = process.get(ProcessAttribute.open_files.name, list()) open_files = open_files if open_files is not None else list() cpu_percentage = process[ProcessAttribute.cpu_percent.name] num_threads = process[ProcessAttribute.num_threads.name] connections_num = process[ProcessAttribute.connections.name] saved_app_profile.add_new_information( memory_usage=rss_memory, child_processes_count=children_count, users=users, open_files=open_files, cpu_percentage=cpu_percentage, data_retrieval_timestamp=self.__latest_retrieval_time, threads_number=num_threads, connections_num=connections_num) AppProfileDataManager.save_app_profile(saved_app_profile)
def __detect_anomalies_in_non_numeric_attribute_with_whitelisting(normalized_attribute_data: List[str], last_retrieved_attribute_data: List[str]) -> \ Tuple[bool, RiskLevel, Set[str]]: """ Detects anomalies by using a whitelist method. :raises TypeError if normalized_attribute_data or last_retrieved_attribute_data are not of type 'List[str]'. :param normalized_attribute_data: The normalized attribute data. :type normalized_attribute_data: List[str] :param last_retrieved_attribute_data: The latest retrieved data. :type last_retrieved_attribute_data: List[str] :return: The results of the anomaly detection through whitelisting. The result is in the following format: (anomaly_found, risk_level, anomalous_values) anomaly_found: Flag for if an anomaly has been found. risk_level: The risk level associated to the anomaly. For this, the risk level is medium risk. anomalous_values: Provides detailed information of the anomalous values. :rtype: Tuple[bool, RiskLevel, Set[str]] """ # Input Validation if not isinstance(normalized_attribute_data, list): raise TypeError( expected_type_but_received_message.format( "normalized_attribute_data", "List[str]", normalized_attribute_data ) ) if not isinstance(last_retrieved_attribute_data, list): raise TypeError( expected_type_but_received_message.format( "last_retrieved_attribute_data", "List[str]", last_retrieved_attribute_data ) ) anomaly_found = False risk_level = RiskLevel.none # when there is not enough data if len(normalized_attribute_data) < wades_config.minimum_retrieval_size_for_modelling: return False, RiskLevel.none, set() last_retrieved_data_set = set(last_retrieved_attribute_data) new_data_accessed = last_retrieved_data_set.difference(normalized_attribute_data) if len(new_data_accessed) > 0: anomaly_found = True risk_level = RiskLevel.medium return anomaly_found, risk_level, new_data_accessed
def save_abnormal_apps(abnormal_apps: List[AppSummary], abnormal_apps_file_path: Path = __default_abnormal_apps_file) -> None: """ Saves the abnormal application in a csv file. :raises TypeError if abnormal apps is not of type 'List[AppSummary]', or if abnormal_apps_file_path is not of type 'pathlib.Path' :param abnormal_apps: The list of abnormal apps to store. :type abnormal_apps: List[AppSummary] :param abnormal_apps_file_path: The file path to save the abnormal apps. It defaults to 'paths.APP_ANOM_FILE_PATH'. :type abnormal_apps_file_path: pathlib.Path """ if not isinstance(abnormal_apps, list): raise TypeError( expected_type_but_received_message.format("abnormal_apps", "List[AppSummary]", abnormal_apps) ) if not isinstance(abnormal_apps_file_path, Path): raise TypeError( expected_type_but_received_message.format( "abnormal_apps_file_path", "pathlib.Path", abnormal_apps_file_path ) ) if len(abnormal_apps) <= 0: return data_retrieval_timestamp_name = AppProfileAttribute.data_retrieval_timestamps.name abnormal_app_columns = [enum.name for enum in AppSummaryAttribute] abnormal_app_columns.remove(AppSummaryAttribute.modelled_app_details.name) abnormal_app_columns.remove(AppSummaryAttribute.latest_retrieved_app_details.name) abnormal_app_columns.append(data_retrieval_timestamp_name) abnormal_apps_parsed = list() for abnormal_app in abnormal_apps: if not isinstance(abnormal_app, AppSummary): raise TypeError( expected_type_but_received_message.format("abnormal_apps", "List[AppSummary]", abnormal_app) ) latest_retrieval_details = abnormal_app.get_latest_retrieved_app_details() retrieval_timestamp = latest_retrieval_details[data_retrieval_timestamp_name][0] abnormal_app_dict = json.loads(str(abnormal_app)) abnormal_app_dict[data_retrieval_timestamp_name] = retrieval_timestamp abnormal_apps_parsed.append(abnormal_app_dict) with_header = not abnormal_apps_file_path.exists() data_frame = pandas.DataFrame(abnormal_apps_parsed, columns=abnormal_app_columns) data_frame.to_csv(abnormal_apps_file_path, index=False, mode='a+', header=with_header)
def __init__(self, min_number_count_non_anomalous: int = 5) -> None: """ Initializes this class. :raises TypeError if min_number_count_non_anomalous is not of type 'int'. :raises ValueError if min_number_count_non_anomalous is less than 0. :param min_number_count_non_anomalous: The minimum number of data points in the same bin as the 'anomalous' point so as to not consider it a high risk anomaly. :type min_number_count_non_anomalous: int """ if not isinstance(min_number_count_non_anomalous, int): raise TypeError( expected_type_but_received_message.format( "min_number_count_non_anomalous", "int", min_number_count_non_anomalous ) ) if min_number_count_non_anomalous < 0: raise ValueError(anomaly_range_percent_not_in_range.format(0, min_number_count_non_anomalous)) # This variable stores the minimum number of data points in the same bin as the detected 'anomalous' point. # It is useful in case of modelling new data and sets how strict does the technique can be. self.__min_count_non_anomalous = min_number_count_non_anomalous
def __call__(self, data: List[AppProfile]) -> List[AppSummary]: """ Models the list of AppProfiles. :raises TypeError if data is not of type List[AppProfile]. :param data: The list of AppProfiles to model. :type data: List[AppProfile] :return: A list of modelled AppProfiles in the form of AppSummary objects. :rtype: List[AppSummary] """ if not isinstance(data, list): raise TypeError( expected_type_but_received_message.format( "data", " List[AppProfile]", data ) ) modelled_apps = list() for app_profile in data: app_summary = self.__frequency_modelling_app(app_profile=app_profile) modelled_apps.append(app_summary) return modelled_apps
def send_request(request: str) -> Any: """ Opens a connection the modeller daemon and send a request. :param request: The message to send to the modeller. :type request: str :return: The response from the modeller. :rtype: Any """ if not isinstance(request, str): raise TypeError(expected_type_but_received_message.format('request', 'str', request)) client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: client.connect((wades_config.localhost_address, wades_config.modeller_thread_port)) except ConnectionRefusedError: return "Modeller service is not accepting requests. Check configuration file and " \ "change run_modeller_server value to True." client.sendall(request.encode()) data = b'' while True: response_from_server = client.recv(4096) if not response_from_server: break data += response_from_server client.close() return json.loads(data)
def __get_app_profile_file_path(app_profile_name: str, base_path: Path = __path_to_use) -> Path: """ Gets the App Profile file path. :param app_profile_name: The name of the app profile. :type app_profile_name: str :param base_path: The base path to save the application profile. It defaults to values paths.APP_PROF_DATA_DIR_PATH if is not running as a test and to paths.TEST_APP_PROF_DATA_DIR_PATH if it is. :type base_path: pathlib.Path :return: The file path of the App profile. :rtype: pathlib.Path """ if not isinstance(base_path, Path): raise TypeError(expected_type_but_received_message.format("base_path", "pathlib.Path", base_path)) mapping_path = base_path / wades_config.app_profile_file_names_map data_frame = AppProfileDataManager.__get_saved_app_profiles_file_names_mapping_dataframe(base_path) matching_index = data_frame.index[data_frame[AppProfileAttribute.app_name.name] == app_profile_name].tolist() if len(matching_index) == 0: data_frame = data_frame.append({AppProfileAttribute.app_name.name: app_profile_name}, ignore_index=True) data_frame.to_csv(mapping_path) matching_index = data_frame.index[data_frame[AppProfileAttribute.app_name.name] == app_profile_name].tolist() index = matching_index[0] file_name = f"{index}.csv" return base_path / file_name
def __build_dict_frequency(data: List[Union[int, float]]) -> RangeKeyDict: """ Creates the frequency model as a dictionary. It uses Freedman–Diaconis rule. :raises TypeError if data is not of type 'List[Union[int, float]]'. :param data: The data to model. :type data: List[Union[int, float]] :return: The frequency model as a dictionary. :rtype Dict[range, int] """ if not isinstance(data, list): raise TypeError( expected_type_but_received_message.format( "data", "List[Union[int, float]]", data ) ) frequency_model = RangeKeyDict() data_count_in_bins, raw_bin_edges = numpy.histogram(data, bins='fd') if len(raw_bin_edges) == 0: return frequency_model initial_range = raw_bin_edges[0] bin_edges = raw_bin_edges[1:] for index in range(0, len(data_count_in_bins)): bin_edge = bin_edges[index] frequency_model[(initial_range, bin_edge)] = data_count_in_bins[index] initial_range = bin_edge return frequency_model
def __frequency_modelling_app(self, app_profile: AppProfile) -> AppSummary: """ Create the frequency model for each attribute in AppProfile. The following attributes are modelled: - __memory_usages - __cpu_percent_usages - __open_files - __data_retrieval_timestamp - __child_processes_count - __users :raises TypeError if app_profile is not an instance of 'AppProfile'. :param app_profile: The application to model. :type app_profile: AppProfile :return: the modelled application as an AppSummary instance. :rtype: AppSummary """ if not isinstance(app_profile, AppProfile): raise TypeError( expected_type_but_received_message.format( "app_profile", "AppProfile", app_profile ) ) numeric_attribute_names = {AppProfileAttribute.memory_infos.name, AppProfileAttribute.cpu_percents.name, AppProfileAttribute.children_counts.name, AppProfileAttribute.threads_numbers.name, AppProfileAttribute.connections_numbers.name} error_message = None max_risk_level = RiskLevel.none anomalous_attrs = set() normalized_app_profile_data = app_profile.get_previously_retrieved_data() latest_app_profile_data = app_profile.get_latest_retrieved_data() if wades_config.is_modelling: # Numeric data is_anomalous_numeric, numeric_max_risk_level, anomalous_attrs = \ self.__detect_anomalies_in_numeric_attributes(normal_app_profile_data=normalized_app_profile_data, latest_app_profile_data=latest_app_profile_data, numeric_attribute_names=numeric_attribute_names) # Non-numeric data is_anomalous_non_numeric, non_numeric_max_risk_level, non_numeric_anomalous_attrs = \ FrequencyTechnique.__detect_anomalies_in_non_numeric_attributes( normalized_app_profile_data=normalized_app_profile_data, latest_app_profile_data=latest_app_profile_data) # Prepare data to convert it into an AppSummary object. max_risk_level = max(numeric_max_risk_level, non_numeric_max_risk_level) anomalous_attrs.update(non_numeric_anomalous_attrs) if is_anomalous_non_numeric or is_anomalous_numeric: error_message = wades_config.anomaly_detected_message app_summary = AppSummary(app_name=app_profile.get_application_name(), error_message=error_message, risk=max_risk_level, abnormal_attrs=anomalous_attrs, latest_retrieved_app_details=latest_app_profile_data, modelled_app_details=latest_app_profile_data) return app_summary
def add_new_information_from_process_object(self, process: psutil.Process, data_retrieval_timestamp: datetime.datetime) -> None: """ Adds the new information about the process to the application profile. This should be mainly used for applications with only one process. :raises TypeError if process is not of type psutil.Process or data_retrieval_timestamp is not of type datetime.datetime. :raises ValueError if data_retrieval_timestamp is newer than current time. :param process: Information about the specific process. :type process: psutil.Process :param data_retrieval_timestamp: The time the data was retrieved. :type data_retrieval_timestamp: datetime.datetime """ if not (isinstance(process, psutil.Process)): raise TypeError(expected_type_but_received_message.format("process", "psutil.Process", process)) if not (isinstance(data_retrieval_timestamp, datetime.datetime)): raise TypeError(expected_type_but_received_message.format("data_retrieval_timestamp", "datetime.datetime", data_retrieval_timestamp)) if data_retrieval_timestamp.replace(tzinfo=None) > datetime.datetime.now(): raise ValueError("Argument data_retrieval_timestamp cannot be newer than current time. Value receive: {}" .format(data_retrieval_timestamp)) # Get info from the process object. One of the following calls may raise an Error (OS, AccessDenied, etc). open_files = process.open_files() memory_info = process.memory_info() child_process_count = len(process.children()) username = process.username() threads_number = process.num_threads() process.cpu_percent() try: connections_num = len(process.connections()) except psutil.AccessDenied: connections_num = 0 time.sleep(0.1) # wait for cpu_percent to return a meaningful value. cpu_percentage = process.cpu_percent() self.add_open_files(open_files=open_files, data_retrieval_timestamp=data_retrieval_timestamp) self.__memory_usages.append(memory_info.rss) self.__data_retrieval_timestamp.append(data_retrieval_timestamp) self.__child_processes_count.append(child_process_count) self.__users.extend(username) self.__cpu_percent_usages.append(cpu_percentage) self.__threads_numbers.append(threads_number) self.__connections_numbers.append(connections_num)
def get_saved_abnormal_apps(abnormal_apps_file_path: Path = __default_abnormal_apps_file) \ -> Dict[str, List[Dict[str, Union[str, list]]]]: """ Retrieved the saved abnormal apps from a csv file. :raises TypeError if abnormal_apps_file_path is not of type 'pathlib.Path' :param abnormal_apps_file_path: The file path to save the abnormal apps. It defaults to 'paths.APP_ANOM_FILE_PATH'. :type abnormal_apps_file_path: pathlib.Path :return: The saved abnormal app profiles as dictionaries of application names and list of the abnormal values. If the file doesn't exist it returns an empty dictionary. Format: { "app_name": [ { "error_message": "Some error message", "risk": "high", "abnormal_attributes": [], "data_retrieval_timestamps": "2021-01-31 20:09:03:771116" }, ... ], ... } :rtype: Dict[str, List[Dict[str, str]]] """ if not isinstance(abnormal_apps_file_path, Path): raise TypeError( expected_type_but_received_message.format( "abnormal_apps_file_path", "pathlib.Path", abnormal_apps_file_path ) ) abnormal_apps_dict = dict() data_retrieval_timestamp_name = AppProfileAttribute.data_retrieval_timestamps.name abnormal_app_columns = [enum.name for enum in AppSummaryAttribute] abnormal_app_columns.remove(AppSummaryAttribute.modelled_app_details.name) abnormal_app_columns.remove(AppSummaryAttribute.latest_retrieved_app_details.name) abnormal_app_columns.append(data_retrieval_timestamp_name) try: for batch in pandas.read_csv(abnormal_apps_file_path, chunksize=app_profile_retrieval_chunk_size): saved_records = batch.to_dict("records") for record in saved_records: app_name = record[AppSummaryAttribute.app_name.name] if app_name not in abnormal_apps_dict: abnormal_apps_dict[app_name] = list() record.pop(AppSummaryAttribute.app_name.name) record[AppSummaryAttribute.abnormal_attributes.name] = \ ast.literal_eval(record[AppSummaryAttribute.abnormal_attributes.name]) abnormal_apps_dict[app_name].append(record) except FileNotFoundError: pass return abnormal_apps_dict
def add_open_files(self, open_files: list, data_retrieval_timestamp: datetime.datetime) -> None: """ Adds the open files to the list of open files for this application. :raises TypeError if open_files is not of type list, or if data_retrieval_timestamp is not of type datetime.datetime. :param open_files: The open files to add. :type open_files: list :param data_retrieval_timestamp: The time the data was retrieved. :type data_retrieval_timestamp: datetime.datetime """ if not isinstance(open_files, list): raise TypeError(expected_type_but_received_message.format("open_files", "list", open_files)) if not isinstance(data_retrieval_timestamp, datetime.datetime): raise TypeError(expected_type_but_received_message.format("data_retrieval_timestamp", "datetime.datetime", data_retrieval_timestamp)) last_accessed_files = {open_file.path for open_file in open_files} self.__open_files.append(list(last_accessed_files))
def save_app_profiles(app_profiles: List[AppProfile], retrieval_timestamp: datetime, app_profile_base_dir: Path = __path_to_use, retrieval_timestamp_file_path: Path = __default_retrieval_timestamp_file) -> None: """ Saves the appProfiles in the file specified by app_profile_file. If no app_profile_file value is provided, it uses the default file, defined in `paths.py` :raises TypeError if app_profiles is not of type 'List[AppProfile]', or if app_profile_base_dir or retrieval_timestamp_file_path are not of type 'pathlib.Path', or if retrieval_timestamp is not of type 'datetime'. :param app_profiles: The list of AppProfiles to save. :type app_profiles: List[AppProfile] :param retrieval_timestamp: The latest retrieval timestamp of the AppProfile objects. :type retrieval_timestamp: datetime :param app_profile_base_dir: The base directory of app profiles files. :type app_profile_base_dir: pathlib.Path :param retrieval_timestamp_file_path: The path of the file to save the retrieval timestamp. :type retrieval_timestamp_file_path: Union[str, pathlib.Path] """ if not isinstance(app_profiles, list): raise TypeError(expected_type_but_received_message.format("app_profiles", "List[AppProfile]", app_profiles)) if not isinstance(app_profile_base_dir, Path): raise TypeError(expected_type_but_received_message.format("app_profile_base_dir", "pathlib.Path", app_profile_base_dir)) if not isinstance(retrieval_timestamp, datetime): raise TypeError(expected_type_but_received_message.format("retrieval_timestamp", "datetime", retrieval_timestamp)) if not isinstance(retrieval_timestamp_file_path, Path): raise TypeError(expected_type_but_received_message.format("retrieval_timestamp_file_path", "pathlib.Path", retrieval_timestamp_file_path)) for app_profile in app_profiles: AppProfileDataManager.save_app_profile(app_profile, app_profile_base_dir) if not isinstance(app_profile, AppProfile): raise TypeError( expected_type_but_received_message.format("app_profiles", "List[AppProfile]", app_profiles)) AppProfileDataManager.save_last_retrieved_data_timestamp(retrieval_timestamp, retrieval_timestamp_file_path)
def save_app_profile(app_profile: AppProfile, base_path: Path = __path_to_use) -> None: """ Save an application profile in the specified base directory. :raises TypeError if app_profile is not of type 'AppProfile', or if base_path is not of type 'pathlib.Path'. :param app_profile: The application profile to save. :type app_profile: AppProfile :param base_path: The base path to save the application profile. It defaults to values paths.APP_PROF_DATA_DIR_PATH if is not running as a test and to paths.TEST_APP_PROF_DATA_DIR_PATH if it is. :type base_path: pathlib.Path """ if not isinstance(app_profile, AppProfile): raise TypeError(expected_type_but_received_message.format("app_profile", "AppProfile", app_profile)) if not isinstance(base_path, Path): raise TypeError(expected_type_but_received_message.format("base_path", "pathlib.Path", base_path)) app_profile_file_path = AppProfileDataManager.__get_app_profile_file_path( app_profile_name=app_profile.get_application_name(), base_path=base_path) app_profile_dict = app_profile.dict_format() data_frame = pandas.DataFrame([app_profile_dict], columns=AppProfileDataManager.__column_names) data_frame.to_csv(app_profile_file_path, index=False)
def save_last_retrieved_data_timestamp(retrieval_timestamp: datetime, retrieval_timestamp_file_path: Path = __default_retrieval_timestamp_file) \ -> None: """ Save the last retrieved timestamp in the specified directory. :raises TypeError if retrieval_timestamp is not of type 'datetime', or if retrieval_timestamp_file_path is not of type 'pathlib.Path'. :param retrieval_timestamp: The retrieved timestamp to save. :type retrieval_timestamp: datetime :param retrieval_timestamp_file_path: The path of the file where the retrieval timestamp is saved. :type retrieval_timestamp_file_path: pathlib.Path """ if not isinstance(retrieval_timestamp_file_path, Path): raise TypeError(expected_type_but_received_message.format("retrieval_timestamp_file_path", "pathlib.Path", retrieval_timestamp_file_path)) if not isinstance(retrieval_timestamp, datetime): raise TypeError(expected_type_but_received_message.format("retrieval_timestamp", "datetime", retrieval_timestamp)) with open(AppProfileDataManager.__default_retrieval_timestamp_file, "w") as file: file.write(retrieval_timestamp.strftime(datetime_format))
def set_modeller_service_flag(self, run_modeller_server_new_value: bool) -> None: """ Set the new value for running the modeller server. If True, the server is executed and information can be send to display. If False, only the ps_handler and modeller are executed. :raises TypeError if run_modeller_server_new_value is not of type 'bool'. :param run_modeller_server_new_value: The new value of the flag to run the modeller server. :type run_modeller_server_new_value: bool """ if not isinstance(run_modeller_server_new_value, bool): raise TypeError( expected_type_but_received_message.format( "run_modeller_server_new_value", "bool", run_modeller_server_new_value)) self.__run_server = run_modeller_server_new_value
def __get_saved_app_profiles_file_names_mapping_dataframe(base_path: Path = __path_to_use) -> DataFrame: """ Get the dataframe containing the list of saved application names and their respective file names. :param base_path: The base path where to retrieve the application profile names. It defaults to values paths.APP_PROF_DATA_DIR_PATH if is not running as a test and to paths.TEST_APP_PROF_DATA_DIR_PATH if it is. :type base_path: pathlib.Path :return: The dataframe containing the list of saved application names and their respective file names. :rtype: pandas.DataFrame """ if not isinstance(base_path, Path): raise TypeError(expected_type_but_received_message.format("base_path", "pathlib.Path", base_path)) mapping_path = base_path / wades_config.app_profile_file_names_map if not mapping_path.exists(): data_frame = pandas.DataFrame(columns=[AppProfileAttribute.app_name.name]) data_frame.to_csv(mapping_path, index=True) return pandas.read_csv(mapping_path, index_col=0)
def print_method_execution_time_seconds( method_to_execute: Callable) -> None: """ Gets the execution time of the provided method and prints it. :raises TypeError: error is raised when method_to_execute is not of type 'Callable' :param method_to_execute: the method to execute. :type method_to_execute: Callable """ method_to_execute_name = getattr(method_to_execute, '__name__', 'Unknown') if not isinstance(method_to_execute, Callable): raise TypeError( expected_type_but_received_message.format( method_to_execute_name, 'Callable', type(method_to_execute))) execution_time = BenchmarkingUtils.get_method_execution_time_seconds( method_to_execute=method_to_execute) print( f"It took {execution_time} seconds to execute {method_to_execute_name}" )
def __init__(self, application_name: str) -> None: """ Abstracts the Application Profile past and current usages. :raises TypeError if application_name is not of type 'str'. :param application_name: the name of the application. :type application_name: str """ if not (isinstance(application_name, str)): raise TypeError(expected_type_but_received_message.format("application_name", "str", application_name)) self.__object_creation_timestamp = datetime.datetime.now() self.__name = application_name self.__memory_usages = list() # this is in bytes self.__cpu_percent_usages = list() self.__open_files = list() self.__data_retrieval_timestamp = list() self.__child_processes_count = list() self.__users = list() self.__threads_numbers = list() self.__connections_numbers = list()
def get_method_execution_time_seconds( method_to_execute: Callable) -> float: """ Get the execution time of the a method in seconds.: :raises TypeError: error is raised when method_to_execute is not of type 'Callable' :param method_to_execute: the method to execute. :type method_to_execute: Callable :return: the time of the executed method in seconds. :rtype: float """ if not isinstance(method_to_execute, Callable): actual_method_to_execute_name = getattr(method_to_execute, '__name__', 'Unknown') raise TypeError( expected_type_but_received_message.format( actual_method_to_execute_name, 'Callable', type(method_to_execute))) start_time = time.time() method_to_execute() return time.time() - start_time
def set_minimum_count_non_anomalous(self, new_value: int) -> None: """ Sets the minimum number of data points in the same bin as the 'anomalous' point so as to not consider it a high risk anomaly. :raises TypeError if new_value is not of type 'int'. :raises ValueError if new_value is less than 0. :param new_value: The new value of min_count_non_anomalous. :type new_value: int """ if not isinstance(new_value, int): raise TypeError( expected_type_but_received_message.format( "new_value", "int", new_value ) ) if new_value < 0: raise ValueError(anomaly_range_percent_not_in_range.format(0, new_value)) self.__min_count_non_anomalous = new_value
def get_saved_profile_as_dict(app_profile_name: str, base_path: Path = __path_to_use) \ -> Union[Dict[str, Any], None]: """ Retrieves the saved profiles from the provided app_profile_file. If no app_profile_file value is specified, it uses the default file, defined in `paths.py`. :raises TypeError if app_profile_name is not of type 'str', or if base_path is not of type 'pathlib.Path'. :param app_profile_name: The name of the application profile to retrieve. :type app_profile_name: str :param base_path: The base directory to find the application profile. :type base_path: Path :return: A dictionary of the application profile names to their respective AppProfile information. :rtype: Union[Dict[str, Any], None] """ if not isinstance(app_profile_name, str): raise TypeError(expected_type_but_received_message.format("app_profile_name", str, app_profile_name)) app_profile_file_path = AppProfileDataManager.__get_app_profile_file_path(app_profile_name, base_path) try: values_raw = pandas.read_csv(app_profile_file_path) dataframe_values = values_raw.values.tolist() for app_profile_info_str_format in dataframe_values: app_profile_info = [] for i in range(0, len(app_profile_info_str_format)): attribute = app_profile_info_str_format[i] if i > 1: attribute = ast.literal_eval(attribute) app_profile_info.append(attribute) app_profile_zip = zip(AppProfileDataManager.__column_names, app_profile_info) return dict(app_profile_zip) except FileNotFoundError: return
def __detect_anomalies_in_numeric_attributes(self, normal_app_profile_data: dict, latest_app_profile_data: dict, numeric_attribute_names: Set[str]) -> Tuple[bool, RiskLevel, Set[str]]: """ Detects the anomalies for all numeric attributes. :raises TypeError if normal_app_profile_data or latest_app_profile_data are not of type 'dict', or if numeric_attribute_names is not of type 'Set[str]'. :param normal_app_profile_data: The normalized application profile data as a dictionary. For more info about the format: 'src.main.common.AppProfile.AppProfile.get_previously_retrieved_data' :type normal_app_profile_data: dict :param latest_app_profile_data: The latest application profile data as a dictionary. For more info about the format: 'src.main.common.AppProfile.AppProfile.get_latest_retrieved_data' :type latest_app_profile_data: dict :param numeric_attribute_names: The numeric attribute names. :type numeric_attribute_names: numeric_attribute_names: Set[str] :return: A tuple with the values of the anomaly detection for all numeric attributes along with the maximum risk level found. :rtype: Tuple[bool, RiskLevel, Set[str]] """ # Input Validation if not isinstance(normal_app_profile_data, dict): raise TypeError( expected_type_but_received_message.format( "normal_app_profile_data", "dict", normal_app_profile_data ) ) if not isinstance(latest_app_profile_data, dict): raise TypeError( expected_type_but_received_message.format( "latest_app_profile_data", "dict", latest_app_profile_data ) ) if not isinstance(numeric_attribute_names, set): raise TypeError( expected_type_but_received_message.format( "numeric_attribute_names", "Set[str]", numeric_attribute_names ) ) risk_levels = set() anomalous_attrs = set() for numeric_attribute_name in numeric_attribute_names: normal_attribute_values = normal_app_profile_data[numeric_attribute_name] latest_attribute_values = latest_app_profile_data[numeric_attribute_name] anomaly_found, risk_level = self.__detect_anomalies_in_numeric_attribute( previous_attribute_data=normal_attribute_values, latest_attribute_data=latest_attribute_values) risk_levels.add(risk_level) if anomaly_found: anomalous_attrs.add(numeric_attribute_name) anomaly = len(anomalous_attrs) > 0 max_risk_level = max(risk_levels) return anomaly, max_risk_level, anomalous_attrs
def __detect_anomalies_in_non_numeric_attributes(normalized_app_profile_data: dict, latest_app_profile_data: dict) \ -> Tuple[bool, RiskLevel, Set[str]]: """ Detects anomalies in non numeric attributes. Currently it only checks 'users' and 'opened_files' attributes. :raises TypeError if normalized_app_profile_data or latest_app_profile_data are not of type 'dict'. :param normalized_app_profile_data: The normalized application profile data as a dictionary. For more info about the format: 'src.main.common.AppProfile.AppProfile.get_previously_retrieved_data' :type normalized_app_profile_data: dict :param latest_app_profile_data: The latest application profile data as a dictionary. For more info about the format: 'src.main.common.AppProfile.AppProfile.get_latest_retrieved_data' :type latest_app_profile_data: dict :return: A tuple with the values of the anomaly detection for all non-numeric attributes along with the max risk level found. :rtype: Tuple[bool, RiskLevel, Set[str]] """ # Input Validation if not isinstance(normalized_app_profile_data, dict): raise TypeError( expected_type_but_received_message.format( "normalized_app_profile_data", "dict", normalized_app_profile_data ) ) if not isinstance(latest_app_profile_data, dict): raise TypeError( expected_type_but_received_message.format( "latest_app_profile_data", "dict", latest_app_profile_data ) ) normalized_users = normalized_app_profile_data[AppProfileAttribute.usernames.name] last_retrieved_users = latest_app_profile_data[AppProfileAttribute.usernames.name] is_user_attr_anomalous, user_attr_risk_level, anomalous_users = \ FrequencyTechnique.__detect_anomalies_in_non_numeric_attribute_with_whitelisting( normalized_attribute_data=normalized_users, last_retrieved_attribute_data=last_retrieved_users) non_numeric_anomalous_attrs = {AppProfileAttribute.usernames.name} if is_user_attr_anomalous else set() # Get opened files info and parse it into appropriate format normalized_files = normalized_app_profile_data[AppProfileAttribute.opened_files.name] normalized_files_flat = list() for files in normalized_files: normalized_files_flat.extend(files) last_retrieved_files = latest_app_profile_data[AppProfileAttribute.opened_files.name] last_retrieved_files_flat = list() for files in last_retrieved_files: last_retrieved_files_flat.extend(files) is_files_anomalous_whitelist, files_whitelist_risk_level, anomalous_file_whitelist = \ FrequencyTechnique.__detect_anomalies_in_non_numeric_attribute_with_whitelisting( normalized_attribute_data=normalized_files_flat, last_retrieved_attribute_data=last_retrieved_files_flat) is_files_anomalous_blacklist, files_blacklist_risk_level, anomalous_file_blacklist = \ FrequencyTechnique.__detect_anomalies_in_non_numeric_attribute_with_blacklisting( normalized_attribute_data=normalized_files_flat, last_retrieved_attribute_data=last_retrieved_files_flat, blacklisted_values=wades_config.prohibited_files) if is_files_anomalous_blacklist or is_files_anomalous_whitelist: non_numeric_anomalous_attrs.add(AppProfileAttribute.opened_files.name) is_anomalous = is_user_attr_anomalous or is_files_anomalous_whitelist or is_files_anomalous_blacklist files_max_risk = max([files_blacklist_risk_level, files_whitelist_risk_level, user_attr_risk_level]) return is_anomalous, files_max_risk, non_numeric_anomalous_attrs
def __detect_anomalies_in_numeric_attribute(self, previous_attribute_data: List[Union[int, float]], latest_attribute_data: List[Union[int, float]]) -> \ Tuple[bool, RiskLevel]: """ Detect anomalies in numeric data and then assigns it a risk level. The risk level assigned depends on the following criteria: * The distance of the new data point to the median. The farther it is the greater the risk. * If the new data point is in the upper outlier, then it will likely be assigned a high risk. * If it's in lower outlier, then it is medium risk category. * If the new data point belongs to a bin that has more than a specified number of points (min_count_non_anomalous), its risk level lowers by 1 (unless it is in the low category). * Only if there are no anomalies found, the risk_level assigned is none. :raises TypeError if previous_attribute_data or latest_attribute_data are not of type 'List[Union[int, float]]'. :param previous_attribute_data: The numeric data used to create the normalized model. :type previous_attribute_data: List[Union[int, float]] :param latest_attribute_data: The numeric data to investigate. :type latest_attribute_data: List[Union[int, float]] :return: A tuple with the values of the anomaly detection along with the risk level associated to the anomaly found. :rtype: Tuple[bool, RiskLevel] """ # Input Validation if not isinstance(previous_attribute_data, list): raise TypeError( expected_type_but_received_message.format( "previous_attribute_data", "List[Union[int, float]]", previous_attribute_data ) ) if not isinstance(latest_attribute_data, list): raise TypeError( expected_type_but_received_message.format( "latest_attribute_data", "List[Union[int, float]]", latest_attribute_data ) ) if len(previous_attribute_data) < wades_config.minimum_retrieval_size_for_modelling: return False, RiskLevel.none attribute_model = FrequencyTechnique.__build_dict_frequency(data=previous_attribute_data) q1, q3 = numpy.percentile(previous_attribute_data, [25, 75]) iqr = q3 - q1 lower_outlier = q1 - (1.5 * iqr) upper_outlier = q3 + (1.5 * iqr) lowest_point = min(previous_attribute_data) highest_point = max(previous_attribute_data) for new_point in latest_attribute_data: bin_count = attribute_model[new_point] if new_point < lower_outlier: risk_level = RiskLevel.medium distance_to_lowest_point = new_point - lowest_point distance_to_outlier = lower_outlier - new_point # If distance_to_lowest point is negative, the new point is lesser than the recorded lowest point. if lower_outlier > lowest_point \ and distance_to_lowest_point > 0 \ and distance_to_outlier < distance_to_lowest_point: risk_level -= 1 if bin_count is not None \ and bin_count > self.__min_count_non_anomalous \ and risk_level > risk_level.low: risk_level -= 1 return True, risk_level elif new_point > upper_outlier: risk_level = RiskLevel.high distance_to_highest_point = highest_point - new_point distance_to_outlier = new_point - upper_outlier # If the distance_to_highest_point is negative, the new point is greater than the recorded highest point if upper_outlier < highest_point \ and distance_to_highest_point > 0 \ and distance_to_outlier < distance_to_highest_point: risk_level -= 1 if bin_count is not None \ and bin_count > self.__min_count_non_anomalous \ and risk_level > risk_level.low: risk_level -= 1 return True, risk_level return False, RiskLevel.none
def __detect_anomalies_in_non_numeric_attribute_with_blacklisting(normalized_attribute_data: List[str], last_retrieved_attribute_data: List[str], blacklisted_values: Set[str]) \ -> Tuple[bool, RiskLevel, Set[str]]: """ Detects anomalies by using blacklisting approach. :raises TypeError if normalized_attribute_data or last_retrieved_attribute_data are not of type 'List[str]', or if blacklisted_values is not of type 'Set[str]' :param normalized_attribute_data: The normalized attribute data. :type normalized_attribute_data: List[str] :param last_retrieved_attribute_data: The latest retrieved data. :type last_retrieved_attribute_data: List[str] :param blacklisted_values: The blacklisted values. :type blacklisted_values: Set[str] :return: The results of the anomaly detection through whitelisting. If a blacklisted value has been accessed before, it is not considered an anomaly if it is accessed again. The result is in the following format: (anomaly_found, risk_level, anomalous_values) anomaly_found: Flag for if an anomaly has been found. risk_level: The risk level associated to the anomaly. For this, the risk level is high risk. anomalous_values: Provides detailed information of the anomalous values. :rtype: Tuple[bool, RiskLevel, Set[str]] """ # Input Validation if not isinstance(normalized_attribute_data, list): raise TypeError( expected_type_but_received_message.format( "normalized_attribute_data", "List[str]", normalized_attribute_data ) ) if not isinstance(blacklisted_values, set): raise TypeError( expected_type_but_received_message.format( "blacklisted_values", "Set[str]", blacklisted_values ) ) if not isinstance(last_retrieved_attribute_data, list): raise TypeError( expected_type_but_received_message.format( "last_retrieved_attribute_data", "List[str]", last_retrieved_attribute_data ) ) anomaly_found = False risk_level = RiskLevel.none normalized_attribute_data_set = set(normalized_attribute_data) previously_accessed_blacklisted_values = normalized_attribute_data_set.intersection(blacklisted_values) # Get the blacklisted data that haven't been accessed before. not_previously_accessed_blacklisted_values = \ blacklisted_values.difference(previously_accessed_blacklisted_values) recently_accessed_blacklisted_values = not_previously_accessed_blacklisted_values.intersection( last_retrieved_attribute_data) if len(recently_accessed_blacklisted_values) > 0: anomaly_found = True risk_level = RiskLevel.high return anomaly_found, risk_level, recently_accessed_blacklisted_values
def set_value_from_dict(self, app_profile_dict: dict) -> None: """ Set the value from dict. Any old values will be lost. If app_name is passed as a key, that value is ignored. :raises TypeError if app_profile_dict is not of type 'dict', or if the following values don't match their required type: - memory_infos -> List[int] - cpu_percent -> List[float] - children_count -> List[int] - usernames -> List[str] - opened_files -> List[List[str]] - threads_numbers -> List[int] - connections_numbers -> List[int] :raises ValueError if app_profile_dict does not have the following keys: - app_name - date_created_timestamp - usernames - memory_infos - opened_files - cpu_percents - children_counts - data_retrieval_timestamps - threads_numbers - connections_numbers :param app_profile_dict: the new values of the application profile. Format: { app_name: "Some name", date_created_timestamp: "2020-12-12 14:30:32:34.232", usernames: [user_1, user_2, ...] memory_infos: [2342, 23215, 31573, ...], opened_files: [[path_1, path_2, ...], [path_45, ...], ...], cpu_percents: [0.2, 13.9, ...], children_counts: [1, 5, 0, 4, ...], data_retrieval_timestamps: [timestamp_1, timestamp_2, ...], threads_numbers:[0, 1, 3, 9, ...], connections_numbers:[0, 1, 3, 9, ...] } All timestamp should have 'YYYY-MM-DD HH:MM:SS:microseconds' format or setting the new values will fail. :type app_profile_dict: dict """ if not isinstance(app_profile_dict, dict): raise TypeError(expected_type_but_received_message.format("app_profile_dict", "dict", app_profile_dict)) app_profile_dict_keys = set(app_profile_dict.keys()) app_profile_attrs = {enum.name for enum in AppProfileAttribute} if app_profile_attrs != app_profile_dict_keys: raise ValueError(expected_value_but_received_message.format("app_profile_dict_keys", app_profile_attrs, app_profile_dict_keys)) self.__object_creation_timestamp = datetime.datetime.strptime( app_profile_dict[AppProfileAttribute.date_created_timestamp.name], wades_config.datetime_format) memory_usages = app_profile_dict[AppProfileAttribute.memory_infos.name] cpu_percents = app_profile_dict[AppProfileAttribute.cpu_percents.name] child_process_counts = app_profile_dict[AppProfileAttribute.children_counts.name] users = app_profile_dict[AppProfileAttribute.usernames.name] opened_files = app_profile_dict[AppProfileAttribute.opened_files.name] threads_numbers = app_profile_dict[AppProfileAttribute.threads_numbers.name] connections_numbers = app_profile_dict[AppProfileAttribute.connections_numbers.name] if not (all(isinstance(rss_mem, int) for rss_mem in memory_usages) and all(isinstance(cpu_percent, float) for cpu_percent in cpu_percents) and all(isinstance(children_count, int) for children_count in child_process_counts) and all(isinstance(threads_number, int) for threads_number in threads_numbers) and all(isinstance(connections_number, int) for connections_number in connections_numbers) and all(isinstance(user, str) for user in users) and (isinstance(opened_files, list) and all( isinstance(files, (list, set)) and isinstance(file, str) for files in opened_files for file in files))): raise TypeError(expected_type_but_received_message.format("app_profile_dict_values", "Union[dict, str, int, 'float']", app_profile_dict)) self.__memory_usages = memory_usages self.__cpu_percent_usages = cpu_percents self.__child_processes_count = child_process_counts self.__users = users self.__open_files = opened_files self.__threads_numbers = threads_numbers self.__connections_numbers = connections_numbers str_data_retrieval_timestamps = app_profile_dict[AppProfileAttribute.data_retrieval_timestamps.name] self.__data_retrieval_timestamp = [datetime.datetime.strptime(retrieval_timestamp, wades_config.datetime_format) for retrieval_timestamp in str_data_retrieval_timestamps]
def add_new_information(self, memory_usage: int, child_processes_count: int, users: list, open_files: list, cpu_percentage: float, data_retrieval_timestamp: datetime.datetime, threads_number: int, connections_num: int) -> None: """ Adds new information about this application. Adds for a specific process associated to the application. :raises TypeError if one of the following criteria is met: - memory_usage, child_processes_count, num_threads, or connection_nums are not of type 'int' - users is not of type 'set' - open_files is not of type 'list' - cpu_percentage is not of type 'float' - data_retrieval_timestamp is not of type 'datetime.datetime' :raises ValueError if either memory_usage, child_processes_count or cpu_percentage has negative value, or if data_retrieval_timestamp is newer than current time. :param memory_usage: The memory usage of this application. :type memory_usage: int :param child_processes_count: The number of child process registered at the moment. :type child_processes_count: int :param users: The users that are running this application. :type users: list :param open_files: The open files to add. :type open_files: list :param cpu_percentage: Current CPU usage for this application. :type cpu_percentage: float :param data_retrieval_timestamp: The time the data was retrieved. :type data_retrieval_timestamp: datetime.datetime :param threads_number: The number of threads associated to this process. :type threads_number: int :param connections_num: The number of connections the process has. :type connections_num: int """ if not isinstance(memory_usage, int): raise TypeError(expected_type_but_received_message.format("memory_usages", "int", memory_usage)) if not isinstance(child_processes_count, int): raise TypeError( expected_type_but_received_message.format("child_processes_count", "int", child_processes_count)) if not isinstance(users, list): raise TypeError(expected_type_but_received_message.format("users", "list", users)) if not isinstance(open_files, list): raise TypeError(expected_type_but_received_message.format("open_files", "list", open_files)) if not isinstance(cpu_percentage, float): raise TypeError(expected_type_but_received_message.format("cpu_percentage", "float", cpu_percentage)) if not (isinstance(data_retrieval_timestamp, datetime.datetime)): raise TypeError(expected_type_but_received_message.format("data_retrieval_timestamp", "datetime.datetime", data_retrieval_timestamp)) if not (isinstance(threads_number, int)): raise TypeError(expected_type_but_received_message.format("threads_number", "int", threads_number)) if not (isinstance(connections_num, int)): raise TypeError(expected_type_but_received_message.format("connections_num", "int", connections_num)) if any(attribute < 0 for attribute in [memory_usage, child_processes_count, cpu_percentage, threads_number, connections_num]): raise ValueError( "Arguments memory_usage, child_processes_count, cpu_percentage, and threads_number cannot have " "negative value, but received [%s]".format( ", ".join(str(attr) for attr in [memory_usage, child_processes_count, cpu_percentage, connections_num]))) if data_retrieval_timestamp.replace(tzinfo=None) > datetime.datetime.now(): raise ValueError("Argument data_retrieval_timestamp cannot be newer than current time. Value receive: %s" .format(data_retrieval_timestamp)) self.add_open_files(open_files=open_files, data_retrieval_timestamp=data_retrieval_timestamp) self.__memory_usages.append(memory_usage) self.__child_processes_count.append(child_processes_count) self.__users.extend(users) self.__cpu_percent_usages.append(cpu_percentage) self.__data_retrieval_timestamp.append(data_retrieval_timestamp) self.__threads_numbers.append(threads_number) self.__connections_numbers.append(connections_num)