def run_test(model_path: RichPath, test_data_path: RichPath, type_lattice_path: RichPath, alias_metadata_path: RichPath, print_predictions: bool = False): test_run_id = "_".join( [time.strftime("%Y-%m-%d-%H-%M-%S"), str(os.getpid())]) test_hyper_overrides = { 'run_id': test_run_id, "dropout_keep_rate": 1.0, } test_data_chunks = test_data_path.get_filtered_files_in_dir('*gz') # Restore model model = model_restore_helper.restore( model_path, is_train=False, hyper_overrides=test_hyper_overrides) evaluator = TypePredictionEvaluator(type_lattice_path, alias_metadata_path) all_annotations = model.annotate(test_data_chunks) for annotation in all_annotations: if ignore_type_annotation(annotation.original_annotation): continue predicted_annotation = max(annotation.predicted_annotation_logprob_dist, key=lambda x: annotation.predicted_annotation_logprob_dist[x]) if print_predictions: print( f'{annotation.provenance} -- {annotation.name}: {annotation.original_annotation} -> {predicted_annotation} ({math.exp(annotation.predicted_annotation_logprob_dist[predicted_annotation])*100:.1f}%)') evaluator.add_sample(ground_truth=annotation.original_annotation, predicted_dist=annotation.predicted_annotation_logprob_dist) print(json.dumps(evaluator.metrics(), indent=2, sort_keys=True))
def _load_data_from_sample(hyperparameters: Dict[str, Any], metadata: Dict[str, Any], raw_sample: Dict[str, Any], result_holder: Dict[str, Any], is_train: bool = True) -> bool: keep_sample = super(Path2Annotation, Path2Annotation)._load_data_from_sample( hyperparameters, metadata, raw_sample, result_holder, is_train) if not keep_sample: return False target_class = [] for node_idx, annotation_data in raw_sample['supernodes'].items(): annotation = annotation_data['annotation'] if is_train and ignore_type_annotation(annotation): continue target_class.append( TypeClassificationModel._get_idx_for_type( annotation, metadata, hyperparameters)) result_holder['variable_target_class'] = np.array(target_class, dtype=np.uint16) return len(target_class) > 0
def _load_data_from_sample(hyperparameters: Dict[str, Any], metadata: Dict[str, Any], raw_sample: Dict[str, Any], result_holder: Dict[str, Any], is_train: bool = True) -> bool: keep_sample = super(Graph2HybridMetric, Graph2HybridMetric)._load_data_from_sample( hyperparameters, metadata, raw_sample, result_holder, is_train) if not keep_sample: return False target_node_idxs, target_class, target_class_id = [], [], [] for node_idx, annotation_data in raw_sample['supernodes'].items(): node_idx = int(node_idx) annotation = annotation_data['annotation'] if is_train and ignore_type_annotation(annotation): continue target_node_idxs.append(node_idx) target_class.append(annotation) target_class_id.append( TypeClassificationModel._get_idx_for_type( annotation, metadata, hyperparameters)) result_holder['target_node_idxs'] = np.array(target_node_idxs, dtype=np.uint16) result_holder['target_type'] = target_class result_holder['variable_target_class'] = np.array(target_class_id, dtype=np.uint16) return len(target_node_idxs) > 0
def errorenous_predictions(): for prediction in predictions_jsonl.read_by_file_suffix(): if ignore_type_annotation(prediction['original_annotation']): continue top_prediction = prediction["predicted_annotation_logprob_dist"][ 0][0] if top_prediction != prediction['original_annotation']: yield prediction
def _load_metadata_from_sample(hyperparameters: Dict[str, Any], raw_sample: Dict[str, Any], raw_metadata: Dict[str, Any]) -> None: annotations = ( annotation_data['annotation'] for annotation_data in raw_sample['supernodes'].values() if not ignore_type_annotation(annotation_data['annotation'])) if 'strip_type_parameters' in hyperparameters and hyperparameters[ 'strip_type_parameters']: annotations = (t.split("[")[0] for t in annotations) raw_metadata['type_occurences_counter'].update(annotations)
def run_test(preds_path: RichPath, type_lattice_path: RichPath, alias_metadata_path: RichPath, result_path: str, top_n=10, print_predictions: bool = False): # test_run_id = "_".join( # [time.strftime("%Y-%m-%d-%H-%M-%S"), str(os.getpid())]) # test_hyper_overrides = { # 'run_id': test_run_id, # "dropout_keep_rate": 1.0, # } #test_data_chunks = test_data_path.get_filtered_files_in_dir('*gz') # Restore model # model = model_restore_helper.restore( # model_path, is_train=False, hyper_overrides=test_hyper_overrides) # all_annotations = model.annotate(test_data_chunks) evaluator = TypePredictionEvaluator(type_lattice_path, alias_metadata_path, top_n=top_n) for i, annotation in enumerate(load_jsonl_gz(preds_path)): if ignore_type_annotation(annotation['original_annotation']): continue if print_predictions: predicted_annotation = max( annotation['predicted_annotation_logprob_dist'], key=lambda x: annotation['predicted_annotation_logprob_dist'][ x]) print( f'{annotation.provenance} -- {annotation.name}: {annotation.original_annotation} -> {predicted_annotation} ({math.exp(annotation.predicted_annotation_logprob_dist[predicted_annotation])*100:.1f}%)' ) evaluator.add_sample( ground_truth=annotation['original_annotation'], predicted_dist=annotation['predicted_annotation_logprob_dist']) # if i > 1500: break with open(result_path, 'w') as f: f.write(json.dumps(evaluator.metrics(), indent=2, sort_keys=True))
def _load_data_from_sample(hyperparameters: Dict[str, Any], metadata: Dict[str, Any], raw_sample: Dict[str, Any], result_holder: Dict[str, Any], is_train: bool = True) -> bool: keep_sample = super(Path2Metric, Path2Metric)._load_data_from_sample( hyperparameters, metadata, raw_sample, result_holder, is_train) if not keep_sample: return False target_class = [] for node_idx, annotation_data in raw_sample['supernodes'].items(): annotation = annotation_data['annotation'] if is_train and ignore_type_annotation(annotation): continue target_class.append(annotation) result_holder['target_type'] = target_class return len(target_class) > 0
def representation_iter(): data_chunk_iterator = (r.read_by_file_suffix() for r in data_paths) with self.__model.sess.as_default(): for raw_data_chunk in data_chunk_iterator: for raw_sample in raw_data_chunk: loaded_sample = {} use_example = self.__model._load_data_from_sample( self.__model.hyperparameters, self.__model.metadata, raw_sample=raw_sample, result_holder=loaded_sample, is_train=False) if not use_example: continue _, fetches = self.__model._run_epoch_in_batches( loaded_sample, '(indexing)', is_train=False, quiet=True, additional_fetch_dict={ 'target_representations': self.__model.ops['target_representations'] }) target_representations = fetches[ 'target_representations'] idx = 0 for node_idx, annotation_data in raw_sample[ 'supernodes'].items(): node_idx = int(node_idx) if 'ignored_supernodes' in loaded_sample and node_idx in loaded_sample[ 'ignored_supernodes']: continue annotation = annotation_data['annotation'] if ignore_type_annotation(annotation): idx += 1 continue yield target_representations[idx], annotation idx += 1
def _load_data_from_sample(hyperparameters: Dict[str, Any], metadata: Dict[str, Any], raw_sample: Dict[str, Any], result_holder: Dict[str, Any], is_train: bool = True) -> bool: keep_sample = super(Sequence2HybridMetric, Sequence2HybridMetric)._load_data_from_sample( hyperparameters, metadata, raw_sample, result_holder, is_train) if not keep_sample: return False token_node_idxs = set(raw_sample['token-sequence']) node_idx_to_supernode_idx = {} # type: Dict[int, int] for from_idx, to_idxs in raw_sample['edges']['OCCURRENCE_OF'].items(): from_idx = int(from_idx) if from_idx not in token_node_idxs: # Some supernodes do not have an associated token. Such nodes are attributes if str(from_idx) in raw_sample['edges']['CHILD']: right_token_idx = max( raw_sample['edges']['CHILD'][str(from_idx)]) assert right_token_idx in token_node_idxs from_idx = right_token_idx else: continue for to_idx in to_idxs: node_idx_to_supernode_idx[from_idx] = to_idx supernodes_with_related_nodes = set(node_idx_to_supernode_idx.values()) variable_types = [] # type: List[str] variable_type_idxs = [] # type: List[int] ignored_supernodes = set() supernode_idxs_to_annotated_variable_idx = {} # type: Dict[int, int] for node_idx, supernode_data in raw_sample['supernodes'].items(): node_idx = int(node_idx) annotation = supernode_data['annotation'] if ignore_type_annotation(annotation) and is_train: ignored_supernodes.add(node_idx) continue if node_idx not in supernodes_with_related_nodes: ignored_supernodes.add(node_idx) continue variable_idx = len(supernode_idxs_to_annotated_variable_idx) variable_types.append(annotation) variable_type_idxs.append( TypeClassificationModel._get_idx_for_type( annotation, metadata, hyperparameters)) supernode_idxs_to_annotated_variable_idx[node_idx] = variable_idx if len(variable_types) == 0: return False token_idx, variable_idx = [], [] def create_token_sequence(): for i, node_idx in enumerate(raw_sample['token-sequence']): supernode_idx = node_idx_to_supernode_idx.get(node_idx) if supernode_idx is not None: annotated_variable_idxs = supernode_idxs_to_annotated_variable_idx.get( supernode_idx) if annotated_variable_idxs is not None: token_idx.append(i) variable_idx.append(annotated_variable_idxs) yield raw_sample['nodes'][node_idx] token_sequence = list(create_token_sequence()) if len(token_sequence) > hyperparameters['max_seq_len']: return False # Did we see at least one token per variable? assert len(np.unique(variable_idx)) == len(variable_types) TokenEmbedder.load_data_from_sample('token', metadata, token_sequence, result_holder, hyperparameters, is_train) result_holder['sequence_length'] = len(token_sequence) result_holder['variable_token_idxs'] = np.array(token_idx, dtype=np.uint32) result_holder['variable_idxs'] = np.array(variable_idx, dtype=np.uint32) result_holder['target_type'] = variable_types result_holder['variable_target_class'] = np.array(variable_type_idxs, dtype=np.uint32) result_holder['ignored_supernodes'] = ignored_supernodes return keep_sample
def ignore_annotation(annotation_str: Optional[str]) -> bool: if annotation_str is None: return False return ignore_type_annotation(annotation_str)
def run(arguments): azure_info_path = arguments.get('--azure-info', None) predictions_jsonl = RichPath.create(arguments['PREDICTIONS_PATH'], azure_info_path) type_lattice_path = RichPath.create(arguments['TYPE_LATTICE_PATH'], azure_info_path) alias_metadata_path = RichPath.create(arguments['ALIAS_METADATA'], azure_info_path) type_lattice = TypeLattice(type_lattice_path, 'typing.Any', alias_metadata_path) exact_match_metric = MetricForPrecRec('Exact Match') up_to_parametric_match_metric = MetricForPrecRec( 'Match Up to Parametric Type') type_correct_metric = MetricForPrecRec('Type Neutral') for prediction in predictions_jsonl.read_as_jsonl(): ground_truth = prediction['original_annotation'] if ignore_type_annotation(ground_truth): continue top_prediction, prediction_logprob = prediction[ "predicted_annotation_logprob_dist"][0] is_exact_match = type_lattice.are_same_type(ground_truth, top_prediction) exact_match_metric.add(is_exact_match, prediction_logprob) correct_up_to_parametric = is_exact_match or type_lattice.are_same_type( ground_truth.split("[")[0], top_prediction.split("[")[0]) up_to_parametric_match_metric.add(correct_up_to_parametric, prediction_logprob) if is_exact_match: type_correct_metric.add(True, prediction_logprob) elif ground_truth in type_lattice and top_prediction in type_lattice: ground_truth_node_idx = type_lattice.id_of(ground_truth) predicted_node_idx = type_lattice.id_of(top_prediction) intersection_nodes_idx = type_lattice.intersect( ground_truth_node_idx, predicted_node_idx) is_ground_subtype_of_predicted = ground_truth_node_idx in intersection_nodes_idx type_correct_metric.add(is_ground_subtype_of_predicted, prediction_logprob) pr_curves = [ exact_match_metric.get_pr_curve() + ('r--', exact_match_metric), up_to_parametric_match_metric.get_pr_curve() + ('b:', up_to_parametric_match_metric), type_correct_metric.get_pr_curve() + ('k-', type_correct_metric) ] fig = plt.figure(figsize=(5.5, 2.5)) ax = fig.add_subplot(111) for name, precision, recall, _, style, metric in pr_curves: print(name, metric.num_elements, recall, precision) print(f'{name}: {precision[0]:%}') ax.plot(recall, precision, style, label=name, linewidth=2) plt.grid() plt.xlim([0, 1.005]) plt.ylim([0, 1.005]) plt.xlabel('Recall') plt.ylabel('Precision') plt.legend() plt.tight_layout() plt.savefig('test.pdf', dpi=300)
def compute(predictions_path: RichPath, type_lattice_path: RichPath, alias_metadata_path: RichPath, json_preds: str, top_n: int = 10): with open(json_preds) as f: json_data = json.load(f)['per_type_stats'] type_lattice = TypeLattice(type_lattice_path, 'typing.Any', alias_metadata_path) data = predictions_path.read_as_jsonl() total_per_kind = defaultdict(int) correct_per_kind = defaultdict(int) up_to_parameteric_per_kind = defaultdict(int) # type_consistency_per_kind = defaultdict(int) # total_per_kind_for_consistency = defaultdict(int) common_tr = 100 corr_exact_per_kind = defaultdict(lambda: defaultdict(int)) corr_param_per_kind = defaultdict(lambda: defaultdict(int)) corr_exact = defaultdict(int) corr_param = defaultdict(int) # For recall and precision calculation pred_annotation = [] true_annotation = [] pred_per_type = defaultdict(list) true_per_type = defaultdict(list) for prediction in data: annotation_type = prediction['annotation_type'] original_annotation = prediction['original_annotation'] if ignore_type_annotation( original_annotation ) or annotation_type == 'variable' or annotation_type == 'imported': continue true_annotation.append(original_annotation) true_per_type[annotation_type].append(original_annotation) total_per_kind[annotation_type] += 1 #top_predicted = prediction['predicted_annotation_logprob_dist'][0][0] #print(prediction['predicted_annotation_logprob_dist']) is_exact_match = False is_accurate_utpt = False for t, s in prediction['predicted_annotation_logprob_dist'][:top_n]: is_exact_match = type_lattice.are_same_type(original_annotation, t) if is_exact_match: correct_per_kind[annotation_type] += 1 corr_exact['all'] += 1 if json_data[original_annotation]['count'] > common_tr: corr_exact_per_kind[annotation_type]['corr_common'] += 1 corr_exact['corr_common'] += 1 else: corr_exact_per_kind[annotation_type]['corr_rare'] += 1 corr_exact['corr_rare'] += 1 pred_annotation.append(t) pred_per_type[annotation_type].append(t) break for t, s in prediction['predicted_annotation_logprob_dist'][:top_n]: is_accurate_utpt = type_lattice.are_same_type( original_annotation.split("[")[0], t.split("[")[0]) if is_accurate_utpt: up_to_parameteric_per_kind[annotation_type] += 1 corr_param['all'] += 1 if json_data[original_annotation]['count'] > common_tr: corr_param_per_kind[annotation_type]['corr_common'] += 1 corr_param['corr_common'] += 1 else: corr_param_per_kind[annotation_type]['corr_rare'] += 1 corr_param['corr_rare'] += 1 break if not is_exact_match: if json_data[original_annotation]['count'] > common_tr: corr_exact_per_kind[annotation_type]['incorr_common'] += 1 corr_exact['incorr_common'] += 1 else: corr_exact_per_kind[annotation_type]['incorr_rare'] += 1 corr_exact['incorr_rare'] += 1 pred_annotation.append( prediction['predicted_annotation_logprob_dist'][0][0]) pred_per_type[annotation_type].append( prediction['predicted_annotation_logprob_dist'][0][0]) if not is_accurate_utpt: if json_data[original_annotation]['count'] > common_tr: corr_param_per_kind[annotation_type]['incorr_common'] += 1 corr_param['incorr_common'] += 1 else: corr_param_per_kind[annotation_type]['incorr_rare'] += 1 corr_param['incorr_rare'] += 1 # if is_exact_match: # type_consistency_per_kind[annotation_type] += 1 # total_per_kind_for_consistency[annotation_type] += 1 # elif original_annotation in type_lattice and top_predicted in type_lattice: # # Type Consistency # ground_truth_node_idx = type_lattice.id_of(original_annotation) # predicted_node_idx = type_lattice.id_of(top_predicted) # intersection_nodes_idx = type_lattice.intersect(ground_truth_node_idx, predicted_node_idx) # is_ground_subtype_of_predicted = ground_truth_node_idx in intersection_nodes_idx # total_per_kind_for_consistency[annotation_type] += 1 # if is_ground_subtype_of_predicted: # type_consistency_per_kind[annotation_type] += 1 print('== Exact Match') for annot_type in total_per_kind: try: print( f'{annot_type}: {correct_per_kind[annot_type] / total_per_kind[annot_type] * 100.0 :.2f} ({correct_per_kind[annot_type]}/{total_per_kind[annot_type]})' ) print( f"Common - {annot_type}: {corr_exact_per_kind[annot_type]['corr_common'] / (corr_exact_per_kind[annot_type]['corr_common'] + corr_exact_per_kind[annot_type]['incorr_common']) * 100.0 :.2f}" ) print( f"Rare - {annot_type}: {corr_exact_per_kind[annot_type]['corr_rare'] / (corr_exact_per_kind[annot_type]['corr_rare'] + corr_exact_per_kind[annot_type]['incorr_rare']) * 100.0 :.2f}" ) r = classification_report(true_per_type[annot_type], pred_per_type[annot_type], output_dict=True) print( f"{annot_type}: F1: {r['weighted avg']['f1-score'] * 100:.2f} R: {r['weighted avg']['recall'] * 100:.2f} P: {r['weighted avg']['precision'] * 100:.2f}" ) print("******************************") except ZeroDivisionError: pass print('== Up to Parametric') for annot_type in total_per_kind: try: print( f'{annot_type}: {up_to_parameteric_per_kind[annot_type] / total_per_kind[annot_type] * 100.0 :.2f} ({up_to_parameteric_per_kind[annot_type]}/{total_per_kind[annot_type]})' ) print( f"Common - {annot_type}: {corr_param_per_kind[annot_type]['corr_common'] / (corr_param_per_kind[annot_type]['corr_common'] + corr_param_per_kind[annot_type]['incorr_common']) * 100.0 :.2f}" ) print( f"Rare - {annot_type}: {corr_param_per_kind[annot_type]['corr_rare'] / (corr_param_per_kind[annot_type]['corr_rare'] + corr_param_per_kind[annot_type]['incorr_rare']) * 100.0 :.2f}" ) print("******************************") except ZeroDivisionError: pass r = classification_report(true_annotation, pred_annotation, output_dict=True) print("Precision: %.2f" % (r['weighted avg']['precision'] * 100)) print("Recall: %.2f" % (r['weighted avg']['recall'] * 100)) print("F1-score: %.2f" % (r['weighted avg']['f1-score'] * 100)) print("******************************") print( f"Exact - All: {corr_exact['all']/len(true_annotation)*100.0:.2f} common: {corr_exact['corr_common'] / (corr_exact['corr_common'] + corr_exact['incorr_common'])*100.0:.2f} rare: {corr_exact['corr_rare'] / (corr_exact['corr_rare'] + corr_exact['incorr_rare'])*100.0:.2f}" ) print( f"Parameteric - All: {corr_param['all']/len(true_annotation)*100.0:.2f} common: {corr_param['corr_common'] / (corr_param['corr_common'] + corr_param['incorr_common'])*100.0:.2f} rare: {corr_param['corr_rare'] / (corr_param['corr_rare'] + corr_param['incorr_rare'])*100.0:.2f}" )