Exemplo n.º 1
0
    def __init__(self,
                 types_path: str,
                 tokenizer: BertTokenizer,
                 neg_entity_count: int = None,
                 neg_rel_count: int = None,
                 max_span_size: int = None,
                 logger: Logger = None,
                 **kwargs):
        types = json.load(
            open(types_path),
            object_pairs_hook=OrderedDict)  # entity + relation types

        self._entity_types = OrderedDict()
        self._idx2entity_type = OrderedDict()
        self._relation_types = OrderedDict()
        self._idx2relation_type = OrderedDict()

        # entities
        # add 'None' entity type
        none_entity_type = EntityType('None', 0, 'None', 'No Entity')
        self._entity_types['None'] = none_entity_type
        self._idx2entity_type[0] = none_entity_type

        # specified entity types
        for i, (key, v) in enumerate(types['entities'].items()):
            entity_type = EntityType(key, i + 1, v['short'], v['verbose'])
            self._entity_types[key] = entity_type
            self._idx2entity_type[i + 1] = entity_type

        # relations
        # add 'None' relation type
        none_relation_type = RelationType('None', 0, 'None', 'No Relation')
        self._relation_types['None'] = none_relation_type
        self._idx2relation_type[0] = none_relation_type

        # specified relation types
        for i, (key, v) in enumerate(types['relations'].items()):
            relation_type = RelationType(key, i + 1, v['short'], v['verbose'],
                                         v['symmetric'])
            self._relation_types[key] = relation_type
            self._idx2relation_type[i + 1] = relation_type

        self._neg_entity_count = neg_entity_count
        self._neg_rel_count = neg_rel_count
        self._max_span_size = max_span_size

        self._datasets = dict()

        self._tokenizer = tokenizer
        self._logger = logger

        self._vocabulary_size = tokenizer.vocab_size
Exemplo n.º 2
0
    def __init__(self, dataset: Dataset, input_reader: BaseInputReader,
                 text_encoder: BertTokenizer, rel_filter_threshold: float,
                 no_overlapping: bool, predictions_path: str,
                 examples_path: str, example_count: int):
        self._text_encoder = text_encoder
        self._input_reader = input_reader
        self._dataset = dataset
        self._rel_filter_threshold = rel_filter_threshold
        self._no_overlapping = no_overlapping

        self._predictions_path = predictions_path
        self._examples_path = examples_path

        self._example_count = example_count

        # relations
        self._gt_relations = []  # ground truth
        self._pred_relations = []  # prediction

        # entities
        self._gt_entities = []  # ground truth
        self._pred_entities = []  # prediction

        self._pseudo_entity_type = EntityType(
            'Entity', 1, 'Entity', 'Entity')  # for span only evaluation

        self._convert_gt(self._dataset.documents)
Exemplo n.º 3
0
    def __init__(self, dataset: Dataset, input_reader: JsonInputReader, text_encoder: BertTokenizer,
                 rel_filter_threshold: float, example_count: int, example_path: str,
                 epoch: int, dataset_label: str):
        self._text_encoder = text_encoder
        self._input_reader = input_reader
        self._dataset = dataset
        self._rel_filter_threshold = rel_filter_threshold

        self._epoch = epoch
        self._dataset_label = dataset_label

        self._example_count = example_count
        self._examples_path = example_path

        # relations
        self._gt_relations = []  # ground truth
        self._pred_relations = []  # prediction

        # entities
        self._gt_entities = []  # ground truth
        self._pred_entities = []  # prediction

        self._pseudo_entity_type = EntityType('Entity', 1, 'Entity', 'Entity')  # for span only evaluation

        self._convert_gt(self._dataset.documents)
Exemplo n.º 4
0
    def __init__(self, pred_dataset: Dataset, dataset: Dataset):

        # super(CustomEvaluator, self).__init__(dataset, input_reader, text_encoder,
        #         rel_filter_threshold, no_overlapping,
        #         predictions_path, examples_path, example_count)
        self._gt_entities = []
        self._gt_relations = []

        self._pred_entities = []
        self._pred_relations = []
        self._pseudo_entity_type = EntityType('Entity', 1, 'Entity', 'Entity')
        self._convert_gt(dataset.documents)
        self._convert_gt(pred_dataset.documents, val=False)