def handle_prediction_state_by_project(project_id, body): # noqa: E501 """Handles the state of the prediction of a project. Handles the prediction state. # noqa: E501 :param project_id: ID of the current project. :type project_id: :param prediction_state: :type prediction_state: dict | bytes :rtype: PredictionStatus """ if connexion.request.is_json: prediction_state = PredictionState.from_dict( connexion.request.get_json()) # noqa: E501 try: db_instance = DatabaseConnector.get_db_instance() project = db_instance.get_project_by_id('project_id', project_id) predicted_network_id = project['predicted_network_id'] #Note: Currently only working for one request. For parallel advanced # handling (task queues, etc) necessary. global prediction_process if prediction_state.state == "Start": print("BEGIN PREDICTION PRCOESS") predictors = db_instance.get_selected_network_features_by_id( 'predicted_network_id', predicted_network_id) evaluation = db_instance.get_evaluation_result_by_id( 'project_id', project_id) configuration = evaluation['result_data'] worker = PredictionWorker(0, project_id, predictors, configuration['with_validation'], configuration['ml_preprocessing'], configuration['train_sampling_ratio'], configuration['test_sampling_ratio'], configuration['random_seed']) prediction_process = multiprocessing.Process(target=worker.predict, daemon=True) DatabaseConnector.get_db_instance().add_linkprediction_status( 0, 0, 0, 'Prediction', 'Waiting', 'project_id', project_id) prediction_process.start() elif prediction_state.state == "Abort": print("ABORT PREDICTION PRCOESS") prediction_process.terminate() db_instance.add_linkprediction_status(0, 0, 0, "Prediction", "Failed", 'project_id', project_id) except Exception as error: return ('Exception', error) return 'Prediction state altered!'
def get_prediction_status_by_project(project_id): # noqa: E501 """Find prediction status by a project ID. Returns the current prediction state. # noqa: E501 :param project_id: ID of the current project. :type project_id: :rtype: PredictionStatus """ try: db_instance = DatabaseConnector.get_db_instance() predicted_network = db_instance.get_predicted_network_by_id( 'project_id', project_id) predicted_network_id = predicted_network['predicted_network_id'] prediction_status = db_instance.get_last_linkprediction_status_by_id( 'predicted_network_id', predicted_network_id) return PredictionStatus(prediction_status['status_id'], prediction_status['log_timestamp'], prediction_status['current_step'], prediction_status['max_steps'], prediction_status['process_step'], prediction_status['status_value']) except Exception as error: return ('Exception', error)
def get_original_network_by_project(project_id): # noqa: E501 """Find original network by a project ID. Returns the original network. # noqa: E501 :param project_id: ID of the current project. :type project_id: :rtype: OriginalNetwork """ try: db = DatabaseConnector.get_db_instance() original_network = db.get_original_network_by_id( 'project_id', project_id) nodes = db.get_nodes_by_id('original_network_id', original_network['original_network_id']) edges = db.get_edges_of_original_network_by_id( 'original_network_id', original_network['original_network_id']) return OriginalNetwork(id=original_network['original_network_id'], designation=original_network['designation'], directed=original_network['directed'], multigraph=original_network['multigraph'], node_count=len(nodes), edge_count=len(edges)) except Exception as error: return ('Exception', error)
def occupation_similarity(u_attr: str, v_attr: str): db = DatabaseConnector.get_db_instance() u_format = _get_occupation_format(u_attr) v_format = _get_occupation_format(v_attr) if u_format is None or v_format is None: raise ValueError('Occupation formatting is wrong!') if len(u_format) == 2: u_occupations = db.get_occupations_by_column(*u_format) else: u_occupations = [db.get_occupation_by_hierarchy(*u_format)] if len(v_format) == 2: v_occupations = db.get_occupations_by_column(*v_format) else: v_occupations = [db.get_occupation_by_hierarchy(*v_format)] u_record, v_record = _choose_correct_hierarchy(u_occupations, v_occupations) if u_record['job_id'] == 0 or v_record['job_id'] == 0: return 0 if u_record['job_id'] == v_record['job_id']: return 1 similarity_score = 0 if u_record['field_of_activity'] == v_record['field_of_activity']: similarity_score += 0.3 if u_record['subject_area'] == v_record['subject_area']: similarity_score += 0.3 similarity_score += 0.4 * _get_competence_similarity( u_competences=u_record['competences'], v_competences=v_record['competences']) return similarity_score
def _save_predicted_graph(self, predicted_graph_train, predicted_graph_test): self.monitor.notify('Processing') pred_net = DatabaseConnector.get_db_instance().get_predicted_network_by_id('project_id', self.project_id) if self.validation: save_predicted_graph_to_db(predicted_graph_test, pred_net['predicted_network_id']) else: save_predicted_graph_to_db(predicted_graph_train, pred_net['predicted_network_id']) self.monitor.notify('Finished')
def get_predictors_by_project(project_id): # noqa: E501 """Find predictors by project ID. Returns all available and selected predictors # noqa: E501 :param project_id: ID of the current project. :type project_id: :rtype: Predictors """ try: db_instance = DatabaseConnector.get_db_instance() predicted_network = db_instance.get_predicted_network_by_id( 'project_id', project_id) predicted_network_id = predicted_network['predicted_network_id'] standard_features = db_instance.get_standard_network_features() standard_feature_list = [] exogenous_attributes = db_instance.get_distinct_node_attributes_by_id( 'project_id', project_id) exogenous_attributes = [ attr for attr in exogenous_attributes if attr in ATTR_HANDLING and ATTR_HANDLING[attr] is not None ] exogenous_attributes_params = [{ 'attribute': attr, 'value': 1 } for attr in exogenous_attributes] for standard_feature in standard_features: if standard_feature[ 'feature_type'] == 'Social Theory with exogenous Attributes': standard_feature['parameters'][ 'attribute_weightings'] = exogenous_attributes_params standard_feature_list.append( Predictor(id=standard_feature['standard_feature_id'], designation=standard_feature['designation'], category=standard_feature['feature_type'], parameters=standard_feature['parameters'])) selected_features = db_instance.get_selected_network_features_by_id( id_type='predicted_network_id', reference_id=predicted_network_id) selected_feature_list = [] for selected_feature in selected_features: selected_feature_list.append( Predictor(id=selected_feature['selected_feature_id'], designation=selected_feature['designation'], category=selected_feature['feature_type'], parameters=selected_feature['parameters'])) return Predictors(available_predictors=standard_feature_list, selected_predictors=selected_feature_list, evaluation_setup=EvaluationSetup( random_seed=42, with_validation=False, train_sampling_ratio=0.7)) except Exception as error: return ('Exception', error)
def delete_project_by_id(project_id): # noqa: E501 """Delete project by an ID. Deletes a project by an ID. # noqa: E501 :param project_id: ID of the project to delete. :type project_id: :rtype: None """ try: db = DatabaseConnector.get_db_instance() db.delete_project(project_id) return f'Deleted project with ID {project_id}' except Exception as error: return ('Exception', error)
def delete_predicted_network_by_project(project_id): # noqa: E501 """Delete predicted network of by a project ID. Deletes predicted network of a project. # noqa: E501 :param project_id: ID of the current project. :type project_id: :rtype: None """ try: db = DatabaseConnector.get_db_instance() db.delete_predicted_network_by_id('project_id', project_id) return f'Deleted predicted network with ProjectID {project_id}' except Exception as error: return ('Exception', error)
def create_prediction_setup(project_id, body): # noqa: E501 """Creates a prediction. Creates a predictors setup. # noqa: E501 :param project_id: ID of the current project. :type project_id: :param prediction_setup: :type prediction_setup: dict | bytes :rtype: None """ try: db_instance = DatabaseConnector.get_db_instance() predicted_network = db_instance.get_predicted_network_by_id( 'project_id', project_id) predicted_network_id = predicted_network['predicted_network_id'] for predictor in body['selected_predictors']: db_instance.add_selected_network_feature_to_project( designation=predictor["designation"], feature_type=predictor["category"], parameters=predictor["parameters"], predicted_network_id=predicted_network_id) evaluation_setup_old = db_instance.get_evaluation_result_by_id( 'project_id', project_id) evaluation_setup = body['evaluation_setup'] if 'train_results' in evaluation_setup_old['result_data']: evaluation_setup['train_results'] = evaluation_setup_old[ 'result_data']['train_results'] evaluation_setup['test_results'] = evaluation_setup_old[ 'result_data']['test_results'] evaluation_setup['timestamp'] = evaluation_setup_old[ 'result_data']['timestamp'] db_instance.add_or_update_evaluation_result( project_id, json.dumps(evaluation_setup)) return 'Created prediction!' except Exception as error: return ('Exception', error)
def get_project_by_id(project_id): # noqa: E501 """Find project by an ID. Returns a project by an ID. # noqa: E501 :param project_id: ID of the project to return. :type project_id: :rtype: Project """ try: db = DatabaseConnector.get_db_instance() project_data = db.get_project_by_id('project_id', project_id) return Project(id=project_data['project_id'], designation=project_data['designation'], description=project_data['description'], original_network_id=project_data['original_network_id'] # predicted_network_id ) except Exception as error: return ('Exception', error)
def save_predicted_graph_to_db(predicted_graph: DiGraph, predicted_network_id: str): db = DatabaseConnector() db.delete_edges_of_predicted_network_by_id('predicted_network_id', predicted_network_id) for edge in predicted_graph.edges: edge_data = predicted_graph[edge[0]][edge[1]] if 'edges' not in edge_data: continue edge = ( edge_data['identifiers'][edge[0]], edge_data['identifiers'][edge[1]] ) edge_color = edge_data['edge_color'] edge_id = db.add_edge_to_predicted_network(edge, edge_color, predicted_network_id) for edge_comp in edge_data['edges']: sub_edge = ( edge_data['identifiers'][edge_comp['source']], edge_data['identifiers'][edge_comp['target']] ) if 'prediction_score' not in edge_comp: edge_comp['prediction_score'] = None edge_comp_id = db.add_component_to_predicted_edge( sub_edge, edge_comp['predicted'], edge_comp['prediction_score'], edge_id ) if 'applied_methods' not in edge_comp: continue applied_methods = [] for method in edge_comp['applied_methods']: applied_methods.append((method, edge_comp['applied_methods'][method])) db.add_applied_methods_to_predicted_edge_component( applied_methods, edge_comp_id )
def get_evaluation_results_by_project(project_id): # noqa: E501 """Find evaluation results by project ID. Returns evaluation results of a project. # noqa: E501 :param project_id: ID of the current project. :type project_id: :rtype: EvaluationResults """ try: db_instance = DatabaseConnector.get_db_instance() evaluation_result = db_instance.get_evaluation_result_by_id( 'project_id', project_id) timestamp = None if 'timestamp' in evaluation_result['result_data']: timestamp = evaluation_result['result_data']['timestamp'] return EvaluationResults(timestamp=timestamp, results=evaluation_result['result_data']) except Exception as error: return ('Exception', error)
def get_predicted_network_by_project(project_id): # noqa: E501 """Find predicted network by a project ID. Returns the predicted network. # noqa: E501 :param project_id: ID of the current project. :type project_id: :rtype: PredictedNetwork """ try: db = DatabaseConnector.get_db_instance() predicted_network = db.get_predicted_network_by_id( 'project_id', project_id) predicted_graph = load_predicted_graph_from_db( predicted_network['predicted_network_id']) return PredictedNetwork(nodes=predicted_graph['nodes'], links=predicted_graph['links'], information=predicted_graph['information']) except Exception as error: return ('Exception', error)
def get_projects(): # noqa: E501 """Returns a list of available projects. Get all projects as an array. # noqa: E501 :rtype: List[Project] """ try: db = DatabaseConnector.get_db_instance() projects_data = db.get_projects() project_instances = [] for project_data in projects_data: project_instances.append( Project( id=project_data['project_id'], designation=project_data['designation'], description=project_data['description'], original_network_id=project_data['original_network_id'], predicted_network_id=project_data['predicted_network_id'])) return project_instances except Exception as error: return ('Exception', error)
def delete_prediction_setup(project_id): # noqa: E501 """Delete selected prediction setup of a project. Delete prediction setup of a project. # noqa: E501 :param project_id: ID of the current project. :type project_id: :rtype: None """ try: db_instance = DatabaseConnector.get_db_instance() predicted_network = db_instance.get_predicted_network_by_id( 'project_id', project_id) predicted_network_id = predicted_network['predicted_network_id'] prediction_features = db_instance.get_selected_network_features_by_id( 'predicted_network_id', predicted_network_id) for predictor in prediction_features: db_instance.delete_selected_network_features_by_id( 'predicted_network_id', predicted_network_id) return 'Deleted prediction features!' except Exception as error: return ('Exception', error)
def _update(self, data): DatabaseConnector.get_db_instance().add_linkprediction_status( data['process_id'], data['step_index'], data['max_steps'], data['name'], data['status'], data['id_type'], data['reference_id'])
def load_predicted_graph_from_db(predicted_network_id: str): db = DatabaseConnector() nodes = db.get_nodes_by_id('predicted_network_id', predicted_network_id) uuid_to_id = {node['node_id']: node['node_network_id'] for node in nodes} dict_nodes = [] for node in nodes: dict_nodes.append({'id': node['node_network_id']}) directed_edge_count = 0 undirected_edge_count = 0 dict_edges = [] edges = db.get_edges_of_predicted_network_by_id('predicted_network_id', predicted_network_id) methods_applied = dict(defaultdict(int)) for edge in edges: edge_comps = db.get_components_of_predicted_edge_by_id( 'predicted_edge_id', edge['predicted_edge_id'] ) directed_edge_count += len(edge_comps) undirected_edge_count += 1 edge_draft = { 'source': uuid_to_id[edge['source_node']], 'target': uuid_to_id[edge['target_node']], 'edge_color': edge['edge_color'], 'edges': [] } for edge_comp in edge_comps: sub_edge = { 'source': uuid_to_id[edge_comp['source']], 'target': uuid_to_id[edge_comp['target']], 'predicted': edge_comp['predicted'] } if sub_edge['predicted'] is True: sub_edge['prediction_score'] = edge_comp['prediction_score'] methods = db.get_applied_methods_of_predicted_edge_component_by_id( 'edge_component_id', edge_comp['edge_component_id'] ) sub_edge['applied_methods'] = {} for method in methods: sub_edge['applied_methods'][ method['method_designation'] ] = method['method_components'] if method['method_designation'] not in methods_applied: methods_applied[method['method_designation']] = {} for spec_method in method['method_components']: if spec_method not in methods_applied[method['method_designation']]: methods_applied[method['method_designation']][spec_method] = 1 else: methods_applied[method['method_designation']][spec_method] += 1 else: sub_edge['prediction_score'] = 1 edge_draft['edges'].append(sub_edge) dict_edges.append(edge_draft) return { 'nodes': dict_nodes, 'links': dict_edges, 'information': { 'node_count': len(nodes), 'directed_edge_count': directed_edge_count, 'undirected_edge_count': undirected_edge_count, 'methods_applied': methods_applied } }
def create_project(**kwargs): # noqa: E501 """Creates a project with an original network file. Creates a project with an original network file. # noqa: E501 :param designation: :type designation: str :param description: :type description: str :param network_designation: :type network_designation: str :param network_directed: :type network_directed: bool :param network_multigraph: :type network_multigraph: bool :param network_file: Binary object which contains the network file with a standard network format. :type network_file: str :param additional_network_file: Binary object which contains an additional network file with a standard network format (especailly used for CSV imports). :type additional_network_file: str :param file_format: :type file_format: str :rtype: Project """ body = dict(kwargs.items()).get('body') file = dict(kwargs.items()).get('network_file') additional_file = dict(kwargs.items()).get('additional_network_file') # Try to process and safe the file before accessing the Database try: file_format = body.get('file_format') network_file = NetworkFile(file_format, file, additional_file) node_list = network_file.parse_nodes() except Exception: logging.exception("Exception while handling the input file") e = http_exceptions.InternalServerError( description= 'Something went wrong! Please check if your network file is correct.' ) raise e try: db = DatabaseConnector.get_db_instance() project_id = db.add_project(designation=body.get('designation'), description=body.get('description')) original_network_id = db.add_original_network_to_project( designation=body.get('network_designation'), directed=body.get('network_directed'), multigraph=body.get('network_multigraph'), project_id=project_id) predicted_network_id = db.add_predicted_network_to_project( designation=body.get('network_designation'), project_id=project_id) nodes = db.add_nodes(node_list, original_network_id, predicted_network_id) edge_list = network_file.parse_edges(nodes) db.add_edges_to_original_network(edge_list, original_network_id) for node in nodes: attribute_list = network_file.parse_attributes(node[0]) if attribute_list: db.add_node_attributes(attribute_list, node[1]) graph = build_original_graph('project_id', project_id) save_predicted_graph_to_db(graph.copy(), predicted_network_id) default_evaluation_setup = { "random_seed": 42, "with_validation": False, "train_sampling_ratio": 0.8, "test_sampling_ratio": 0.9, "ml_preprocessing": False } db.add_or_update_evaluation_result(project_id, default_evaluation_setup) return Project(id=project_id, designation=body.get('designation'), description=body.get('description'), original_network_id=original_network_id, predicted_network_id=predicted_network_id) except Exception: logging.exception( "Exception occured while inserting data in the database") e = http_exceptions.InternalServerError( description= 'Something went wrong! The input file seems to be wrong and the data could not be loaded into the database.' ) raise e
def predict(self): try: self.monitor.pending() # PIPELINE - PREPARATION ground_truth_graph_h = build_original_graph('project_id', self.project_id, 'hierarchical') ground_truth_graph_f = hierarchical_to_flat(ground_truth_graph_h) train_graph_f, test_graph_f = self._sample_graphs(ground_truth_graph_f) train_missing_edges, test_missing_edges = self._get_missing_edges(ground_truth_graph_f, train_graph_f, test_graph_f) train_set, test_set = self._prepare_labels(ground_truth_graph_f, test_graph_f, train_missing_edges, test_missing_edges) train_graph_h = flat_to_hierarchical(train_graph_f) predicted_graph_train = train_graph_h.copy() test_graph_h = None predicted_graph_test = None if self.validation: test_graph_h = flat_to_hierarchical(test_graph_f) predicted_graph_test = test_graph_h.copy() # PIPELINE - TOPOLOGY train_t_df, test_t_df = self._topology_pipeline(ground_truth_graph_f, train_graph_f, test_graph_f, train_set, test_set) # PIPELINE - SOCIALTHEORY train_st_df, test_st_df = self._socialtheory_pipeline(train_graph_h, test_graph_h, train_set, test_set, predicted_graph_train, predicted_graph_test) # PIPELINE - CLASSIFICATION train_features_df = pd.concat([train_t_df, train_st_df], axis=1) test_features_df = None if not test_t_df is None: test_features_df = pd.concat([test_t_df, test_st_df], axis=1) train_c_df, test_c_df = self._classification_pipeline(train_features_df, test_features_df) # CREATE PREDICTED GRAPH self._add_topology_to_predicted_graph(train_t_df, test_t_df, predicted_graph_train, predicted_graph_test) self._add_classification_to_predicted_graph(train_set.node_pairs, train_c_df, test_c_df, predicted_graph_train, predicted_graph_test) train_final = pd.concat([train_set.node_pairs, train_c_df], axis=1) train_final = pd.concat([train_set.label, train_final], axis=1) test_final = None if not test_c_df is None: test_final = pd.concat([test_set.node_pairs, test_c_df], axis=1) test_final = pd.concat([test_set.label, test_final], axis=1) self._save_predicted_graph(predicted_graph_train, predicted_graph_test) # PIPELINE - EVALUATION results_train, results_test = self._create_results( train_final, test_final) db_instance = DatabaseConnector.get_db_instance() evaluation_model = db_instance.get_evaluation_result_by_id('project_id', self.project_id) evaluation_data = evaluation_model['result_data'] evaluation_data['timestamp'] = datetime.datetime.now().timestamp() evaluation_data['train_results'] = results_train evaluation_data['test_results'] = results_test db_instance.add_or_update_evaluation_result(self.project_id, evaluation_data) self.monitor.finished() except Exception as e: print(e) self.monitor.failed()
def build_original_graph(id_type, reference_id, return_type='hierarchical'): """ id_type: possible values = ['original_network_id', 'project_id'] reference_id: value of the id return_type: hierarchical -> returns original_graph_hierarchical flat -> returns original_graph_flat both -> returns (original_graph_hierarchical, original_graph_flat) """ if id_type not in ['original_network_id', 'project_id']: raise ValueError('Parameter id_type is not valid!') if return_type not in ['hierarchical', 'flat', 'both']: raise ValueError('Parameter return_type is not valid!') try: db = DatabaseConnector.get_db_instance() nodes = db.get_nodes_by_id(id_type, reference_id) uuid_to_id = { node['node_id']: node['node_network_id'] for node in nodes } edges = db.get_edges_of_original_network_by_id(id_type, reference_id) edges_flat = edges.copy() edges_hierarchical = edges.copy() original_graph_flat = DiGraph() original_graph_hierarchical = DiGraph() for node in nodes: if return_type == 'hierarchical' or return_type == 'both': node_attributes = db.get_node_attributes_by_id( 'node_id', node['node_id']) kv_attributes = { record['attribute_name']: record['attribute_value'] for record in node_attributes } original_graph_hierarchical.add_node( node_for_adding=node['node_network_id'], identifiers={**node}, **kv_attributes) if return_type == 'flat' or return_type == 'both': original_graph_flat.add_node( node_for_adding=node['node_network_id']) if return_type == 'hierarchical' or return_type == 'both': for edge in edges_hierarchical: source_node_record = next( filter(lambda x: x['node_id'] == edge['source_node'], nodes)) target_node_record = next( filter(lambda x: x['node_id'] == edge['target_node'], nodes)) sub_edges = [{ 'source': source_node_record['node_network_id'], 'target': target_node_record['node_network_id'], 'predicted': False }] rev_edge = next( filter( lambda x: x['target_node'] == edge['source_node'] and x['source_node'] == edge['target_node'], edges), None) if rev_edge is not None: sub_edges.append({ 'source': target_node_record['node_network_id'], 'target': source_node_record['node_network_id'], 'predicted': False }) edges_hierarchical.remove(rev_edge) edge[uuid_to_id[edge['source_node']]] = edge['source_node'] edge[uuid_to_id[edge['target_node']]] = edge['target_node'] original_graph_hierarchical.add_edge( u_of_edge=source_node_record['node_network_id'], v_of_edge=target_node_record['node_network_id'], identifiers={**edge}, edges=sub_edges, edge_color=COLOR_ORIGINAL_ONLY) if return_type == 'flat' or return_type == 'both': for edge in edges_flat: source_node_record = next( filter(lambda x: x['node_id'] == edge['source_node'], nodes)) target_node_record = next( filter(lambda x: x['node_id'] == edge['target_node'], nodes)) edge[uuid_to_id[edge['source_node']]] = edge['source_node'] edge[uuid_to_id[edge['target_node']]] = edge['target_node'] original_graph_flat.add_edge( u_of_edge=source_node_record['node_network_id'], v_of_edge=target_node_record['node_network_id'], identifiers={**edge}, edge_color=COLOR_ORIGINAL_ONLY, predicted=False) if return_type == 'flat': return original_graph_flat if return_type == 'hierarchical': return original_graph_hierarchical if return_type == 'both': return (original_graph_hierarchical, original_graph_flat) except Exception as error: print(error) return ('Exception', error)