Пример #1
0
    def _eval_pas(self,
                  arguments_set,
                  dataset: PASDataset,
                  corpus: str,
                  suffix: str = '') -> Dict[str, ScoreResult]:
        prediction_output_dir = self.save_dir / f'{corpus}_out{suffix}'
        prediction_writer = PredictionKNPWriter(
            dataset, self.logger, use_knp_overt=(not self.predict_overt))
        documents_pred = prediction_writer.write(arguments_set,
                                                 prediction_output_dir,
                                                 add_pas_tag=False)

        log = {}
        for pas_target in self.pas_targets:
            scorer = Scorer(documents_pred,
                            dataset.gold_documents,
                            target_cases=dataset.target_cases,
                            target_exophors=dataset.target_exophors,
                            coreference=dataset.coreference,
                            bridging=dataset.bridging,
                            pas_target=pas_target)
            result = scorer.run()
            target = corpus + (f'_{pas_target}' if pas_target else '') + suffix

            scorer.write_html(self.save_dir / f'{target}.html')
            result.export_txt(self.save_dir / f'{target}.txt')
            result.export_csv(self.save_dir / f'{target}.csv')

            log[pas_target] = result

        return log
Пример #2
0
def main(config, args):
    analyzer = Analyzer(config, remote_knp=args.remote_knp)

    if args.input is not None:
        source = args.input
    elif args.knp_dir is not None:
        source = Path(args.knp_dir)
    else:
        source = ''.join(sys.stdin.readlines())

    arguments_set, dataset = analyzer.analyze(source)

    prediction_writer = PredictionKNPWriter(dataset, logger)
    if args.export_dir is not None:
        destination = Path(args.export_dir)
    elif args.tab is True:
        destination = sys.stdout
    else:
        destination = None
    documents_pred: List[Document] = prediction_writer.write(
        arguments_set,
        destination,
        skip_untagged=args.skip_untagged,
        add_pas_tag=(not args.rel_only))
    if args.tab is False:
        for document_pred in documents_pred:
            for sid in document_pred.sid2sentence.keys():
                draw_tree(document_pred, sid, dataset.target_cases,
                          dataset.bridging, dataset.coreference, sys.stdout)
Пример #3
0
    def _eval_pas(self, arguments_set, dataset: PASDataset, corpus: str, suffix: str = ''):
        prediction_output_dir = self.save_dir / f'{corpus}_out{suffix}'
        prediction_writer = PredictionKNPWriter(dataset,
                                                self.logger,
                                                use_knp_overt=(not self.predict_overt))
        documents_pred = prediction_writer.write(arguments_set, prediction_output_dir)
        documents_gold = dataset.joined_documents if corpus == 'kc' else dataset.documents

        result = {}
        for pas_target in self.pas_targets:
            scorer = Scorer(documents_pred, documents_gold,
                            target_cases=dataset.target_cases,
                            target_exophors=dataset.target_exophors,
                            coreference=dataset.coreference,
                            bridging=dataset.bridging,
                            pas_target=pas_target)

            stem = corpus
            if pas_target:
                stem += f'_{pas_target}'
            stem += suffix
            if self.target != 'test':
                scorer.write_html(self.save_dir / f'{stem}.html')
            scorer.export_txt(self.save_dir / f'{stem}.txt')
            scorer.export_csv(self.save_dir / f'{stem}.csv')

            metrics = self._eval_metrics(scorer.result_dict())
            for met, value in zip(self.metrics, metrics):
                met_name = met.__name__
                if 'case_analysis' in met_name or 'zero_anaphora' in met_name:
                    if pas_target:
                        met_name = f'{pas_target}_{met_name}'
                result[met_name] = value

        return result
Пример #4
0
    def _valid_epoch(self, data_loader, corpus):
        """
        Validate after training an epoch
        :return: A log that contains information about validation
        Note:
            The validation metrics in log must have the key 'val_metrics'.
        """
        self.model.eval()
        total_loss = 0
        arguments_set: List[List[List[int]]] = []
        contingency_set: List[int] = []
        with torch.no_grad():
            for step, batch in enumerate(data_loader):
                batch = {label: t.to(self.device, non_blocking=True) for label, t in batch.items()}

                loss, *output = self.model(**batch)

                if len(loss.size()) > 0:
                    loss = loss.mean()
                pas_scores = output[0]  # (b, seq, case, seq)

                if corpus != 'commonsense':
                    arguments_set += torch.argmax(pas_scores, dim=3).tolist()  # (b, seq, case)

                total_loss += loss.item() * pas_scores.size(0)

                if step % self.log_step == 0:
                    self.logger.info('Validation [{}/{} ({:.0f}%)] Time: {}'.format(
                        step * data_loader.batch_size,
                        len(data_loader.dataset),
                        100.0 * step / len(data_loader),
                        datetime.datetime.now().strftime('%H:%M:%S')))

        log = {'loss': total_loss / len(data_loader.dataset)}
        self.writer.add_scalar(f'loss/{corpus}', log['loss'])

        if corpus != 'commonsense':
            dataset = data_loader.dataset
            prediction_writer = PredictionKNPWriter(dataset, self.logger)
            documents_pred = prediction_writer.write(arguments_set, None, add_pas_tag=False)
            targets2label = {tuple(): '', ('pred',): 'pred', ('noun',): 'noun', ('pred', 'noun'): 'all'}

            scorer = Scorer(documents_pred, dataset.gold_documents,
                            target_cases=dataset.target_cases,
                            target_exophors=dataset.target_exophors,
                            coreference=dataset.coreference,
                            bridging=dataset.bridging,
                            pas_target=targets2label[tuple(dataset.pas_targets)])
            result = scorer.run()
            log['result'] = result
        else:
            log['f1'] = self._eval_commonsense(contingency_set)

        return log
def analyze_raw_data_from_client(knp_result: str):
    log_dir = Path('log') / datetime.now().strftime(r'%Y%m%d_%H%M%S')
    arguments_set, dataset = analyzer.analyze_from_knp(knp_result, knp_dir=log_dir)

    prediction_writer = PredictionKNPWriter(dataset, logger)
    with io.StringIO() as string:
        _ = prediction_writer.write(arguments_set, string, skip_untagged=False)
        knp_result = string.getvalue()
    with log_dir.joinpath('pas.knp').open('wt') as f:
        f.write(knp_result)
    return knp_result
Пример #6
0
def api():
    input_string = request.args['input']
    log_dir = Path('log') / datetime.now().strftime(r'%Y%m%d_%H%M%S')

    arguments_set, dataset = analyzer.analyze(input_string, knp_dir=log_dir)

    prediction_writer = PredictionKNPWriter(dataset, logger)
    with io.StringIO() as string:
        document: Document = prediction_writer.write(arguments_set, string, skip_untagged=False)[0]
        knp_result: str = string.getvalue()
    with log_dir.joinpath('pas.knp').open('wt') as f:
        f.write(knp_result)

    html_string = textwrap.dedent('''
        <style type="text/css">
        pre {
            font-family: "MS ゴシック", "Osaka-Mono", "Osaka-等幅", "さざなみゴシック", "Sazanami Gothic", sans-serif;
            white-space: pre;
        }
        </style>
        ''')
    html_string += '<pre>\n'
    for sid in document.sid2sentence.keys():
        with io.StringIO() as string:
            draw_tree(document, sid, dataset.target_cases, dataset.bridging, dataset.coreference, string, html=True)
            tree_string = string.getvalue()
        logger.info('output:\n' + tree_string)
        html_string += tree_string
    html_string += '</pre>\n'

    return make_response(jsonify({
        "input": analyzer.sanitize_string(input_string),
        "output": [
            {'result': html_string},
            {'results in a KNP format': html.escape(knp_result).replace('\n', '<br>')}
        ]
    }))
Пример #7
0
    def _valid_epoch(self, data_loader, corpus):
        """
        Validate after training an epoch
        :return: A log that contains information about validation
        Note:
            The validation metrics in log must have the key 'val_metrics'.
        """
        self.model.eval()
        total_loss = 0
        arguments_set: List[List[List[int]]] = []
        contingency_set: List[int] = []
        with torch.no_grad():
            for step, batch in enumerate(data_loader):
                batch = {
                    label: t.to(self.device, non_blocking=True)
                    for label, t in batch.items()
                }

                loss, *output = self.model(**batch)

                if len(loss.size()) > 0:
                    loss = loss.mean()
                if re.match(r'.*(CaseInteraction|Refinement|Duplicate).*Model',
                            self.config['arch']['type']):
                    pas_scores = output[-1]  # (b, seq, case, seq)
                elif self.config['arch']['type'] == 'CommonsenseModel':
                    pas_scores = output[0]  # (b, seq, case, seq)
                    contingency_set += output[1].gt(0.5).int().tolist()
                else:
                    pas_scores = output[0]  # (b, seq, case, seq)

                if corpus != 'commonsense':
                    arguments_set += torch.argmax(
                        pas_scores, dim=3).tolist()  # (b, seq, case)

                total_loss += loss.item() * pas_scores.size(0)

                if step % self.log_step == 0:
                    self.logger.info(
                        'Validation [{}/{} ({:.0f}%)] Time: {}'.format(
                            step * data_loader.batch_size,
                            len(data_loader.dataset),
                            100.0 * step / len(data_loader),
                            datetime.datetime.now().strftime('%H:%M:%S')))

        log = {'loss': total_loss / len(data_loader.dataset)}
        self.writer.add_scalar(f'loss/{corpus}', log['loss'])

        if corpus != 'commonsense':
            dataset = data_loader.dataset
            prediction_writer = PredictionKNPWriter(dataset, self.logger)
            documents_pred = prediction_writer.write(arguments_set, None)
            documents_gold = dataset.joined_documents if corpus == 'kc' else dataset.documents
            targets2label = {
                tuple(): '',
                ('pred', ): 'pred',
                ('noun', ): 'noun',
                ('pred', 'noun'): 'all'
            }

            scorer = Scorer(documents_pred,
                            documents_gold,
                            target_cases=dataset.target_cases,
                            target_exophors=dataset.target_exophors,
                            coreference=dataset.coreference,
                            bridging=dataset.bridging,
                            pas_target=targets2label[tuple(
                                dataset.pas_targets)])

            val_metrics = self._eval_metrics(scorer.result_dict(), corpus)

            log.update(
                dict(zip([met.__name__ for met in self.metrics], val_metrics)))
        else:
            log['f1'] = self._eval_commonsense(contingency_set)

        return log