示例#1
0
def _alignment_module(op, hidden_size):
    module = _utils.get_module(AlignmentNetwork,
                               op,
                               hidden_size=hidden_size,
                               required=True)
    module.expect_signature("[AxBxC, AxDxC] -> [AxBxD]")
    return module
示例#2
0
    def _create(cls, arg, **kwargs):
        r"""Create a word aggregator object.

        Args:
            arg (str or :mod:`deepmatcher.word_aggregators` or callable):
                Same as the `word_aggregator` argument to the constructor of
                :class:`AttrSummarizer`.
        **kwargs:
            Keyword arguments to the constructor of the WordAggregator sub-class.
            For details on what these can be, please refer to the documentation of the
            sub-classes in :mod:`deepmatcher.word_aggregators`.

        """
        assert arg is not None
        if isinstance(arg, six.string_types):
            parts = arg.split("-")
            if parts[-1] == "pool" and dm.word_aggregators.Pool.supports_style(
                "-".join(parts[:-1])
            ):
                seq = []
                seq.append(dm.modules.Lambda(lambda x1, x2: x1))  # Ignore the context.
                seq.append(dm.word_aggregators.Pool(style="-".join(parts[:-1])))

                # Make lazy module.
                wa = LazyModuleFn(lambda: dm.modules.MultiSequential(*seq))
            elif arg == "attention-with-rnn":
                wa = dm.word_aggregators.AttentionWithRNN(**kwargs)
            else:
                raise ValueError("Unknown Word Aggregator name.")
        else:
            wa = _utils.get_module(WordAggregator, arg)

        wa.expect_signature("[AxBxC] -> [AxD]")
        return wa
示例#3
0
    def _create(cls, arg, **kwargs):
        r"""Create a word comparator object.

        Args:
            arg (str or :mod:`deepmatcher.word_comparators` or callable):
                Same as the `word_comparator` argument to the constructor of
                :class:`AttrSummarizer`.
            **kwargs:
                Keyword arguments to the constructor of the WordComparator sub-class.
                For details on what these can be, please refer to the documentation of the
                sub-classes in :mod:`deepmatcher.word_comparators`.

        """
        if isinstance(arg, six.string_types):
            parts = arg.split("-")
            if parts[1] == "attention" and dm.modules.AlignmentNetwork.supports_style(
                parts[0]
            ):
                wc = dm.word_comparators.Attention(alignment_network=parts[0], **kwargs)
            else:
                raise ValueError("Unknown Word Comparator name.")
        else:
            wc = _utils.get_module(WordComparator, arg)

        if wc is not None:
            wc.expect_signature("[AxBxC, AxDxC, AxBxE, AxDxE] -> [AxBxF]")

        return wc
示例#4
0
    def _create(cls, arg, **kwargs):
        r"""Create a word contextualizer object.

        Args:
            arg (str or :mod:`deepmatcher.word_contextualizers` or callable):
                Same as the `word_contextualizer` argument to the constructor of
                :class:`AttrSummarizer`.
            **kwargs:
                Keyword arguments to the constructor of the WordContextualizer sub-class.
                For details on what these can be, please refer to the documentation of the
                sub-classes in :mod:`deepmatcher.word_contextualizers`.

        """
        if isinstance(arg, six.string_types):
            if dm.word_contextualizers.RNN.supports_style(arg):
                wc = dm.word_contextualizers.RNN(arg, **kwargs)
            elif arg == "self-attention":
                wc = dm.word_contextualizers.SelfAttention(**kwargs)
            else:
                raise ValueError("Unknown Word Contextualizer name.")
        else:
            wc = _utils.get_module(WordContextualizer, arg)

        if wc is not None:
            wc.expect_signature("[AxBxC] -> [AxBxD]")

        return wc
示例#5
0
    def _create(cls, arg, **kwargs):
        r"""Create an attribute summarization object.

        Args:
            arg (str or :mod:`deepmatcher.attr_summarizers` or callable):
                Same as the `attr_summarizer` argument to the constructor of
                :class:`MatchingModel`.
            **kwargs:
                Keyword arguments to the constructor of the AttrSummarizer sub-class.
                For details on what these can be, please refer to the documentation of the
                sub-classes in :mod:`deepmatcher.attr_summarizers`.

        """
        assert arg is not None
        if isinstance(arg, six.string_types):
            type_map = {
                "sif": dm.attr_summarizers.SIF,
                "rnn": dm.attr_summarizers.RNN,
                "attention": dm.attr_summarizers.Attention,
                "hybrid": dm.attr_summarizers.Hybrid,
            }
            if arg in type_map:
                asr = type_map[arg](**kwargs)
            else:
                raise ValueError("Unknown Attribute Summarizer name.")
        else:
            asr = _utils.get_module(AttrSummarizer, arg)

        asr.expect_signature("[AxBxC, AxDxC] -> [AxE, AxE]")
        return asr
示例#6
0
def _transform_module(op, hidden_size, output_size=None):
    output_size = output_size or hidden_size
    module = _utils.get_module(Transform,
                               op,
                               hidden_size=hidden_size,
                               output_size=output_size)
    if module:
        module.expect_signature("[AxB] -> [AxC]")
        module.expect_signature("[AxBxC] -> [AxBxD]")
    return module
示例#7
0
    def _init(
        self,
        hidden_size=None,
        input_dropout=0,
        rnn="gru",
        rnn_pool_style="birnn-last",
        score_dropout=0,
        input_context_comparison_network="1-layer-highway",
        value_transform_network=None,
        transform_dropout=0,
        input_size=None,
    ):

        # self.alignment_network = dm.modules._alignment_module(
        #     alignment_network, hidden_size=hidden_size)

        assert rnn is not None
        self.rnn = _utils.get_module(dm.modules.RNN,
                                     rnn,
                                     hidden_size=hidden_size)
        self.rnn.expect_signature("[AxBxC] -> [AxBx{D}]".format(D=hidden_size))

        self.rnn_pool = ModulePool(rnn_pool_style)

        self.input_context_comparison_network = dm.modules._transform_module(
            input_context_comparison_network, hidden_size=hidden_size)
        self.scoring_network = dm.modules._transform_module("1-layer",
                                                            hidden_size=1)
        self.value_transform_network = dm.modules._transform_module(
            value_transform_network, hidden_size=hidden_size)

        self.input_dropout = nn.Dropout(input_dropout)
        self.transform_dropout = nn.Dropout(transform_dropout)
        self.score_dropout = nn.Dropout(score_dropout)

        self.softmax = nn.Softmax(dim=1)
示例#8
0
def _bypass_module(op):
    module = _utils.get_module(Bypass, op)
    if module:
        module.expect_signature("[AxB, AxC] -> [AxB]")
    return module
示例#9
0
def _merge_module(op):
    module = _utils.get_module(Merge, op)
    if module:
        module.expect_signature("[AxB, AxB] -> [AxC]")
    return module
示例#10
0
    def initialize(self, train_dataset, init_batch=None):
        """Initialize (not lazily) the matching model given the actual training data.

        Instantiates all sub-components and their trainable parameters.

        Args:
            train_dataset (:class:`~deepmatcher.data.MatchingDataset`):
                The training dataset obtained using :func:`deepmatcher.data.process`.
            init_batch (:class:`~deepmatcher.batch.MatchingBatch`):
                A batch of data to forward propagate through the model. If None, a batch
                is drawn from the training dataset.

        """
        if self._initialized:
            return

        # Copy over training info from train set for persistent state. But remove actual
        # data examples.
        self.meta = Bunch(**train_dataset.__dict__)
        if hasattr(self.meta, "fields"):
            del self.meta.fields
            del self.meta.examples

        self._register_train_buffer("state_meta", Bunch(**self.meta.__dict__))
        del (
            self.state_meta.metadata
        )  # we only need `self.meta.orig_metadata` for state.

        self.attr_summarizers = dm.modules.ModuleMap()
        if isinstance(self.attr_summarizer, Mapping):
            for name, summarizer in self.attr_summarizer.items():
                self.attr_summarizers[name] = AttrSummarizer._create(
                    summarizer, hidden_size=self.hidden_size
                )
            assert (
                len(
                    set(self.attr_summarizers.keys())
                    ^ set(self.meta.canonical_text_fields)
                )
                == 0
            )
        else:
            self.attr_summarizer = AttrSummarizer._create(
                self.attr_summarizer, hidden_size=self.hidden_size
            )
            for name in self.meta.canonical_text_fields:
                self.attr_summarizers[name] = copy.deepcopy(self.attr_summarizer)

        if self.attr_condense_factor == "auto":
            self.attr_condense_factor = min(len(self.meta.canonical_text_fields), 6)
            if self.attr_condense_factor == 1:
                self.attr_condense_factor = None

        if not self.attr_condense_factor:
            self.attr_condensors = None
        else:
            self.attr_condensors = dm.modules.ModuleMap()
            for name in self.meta.canonical_text_fields:
                self.attr_condensors[name] = dm.modules.Transform(
                    "1-layer-highway",
                    non_linearity=None,
                    output_size=self.hidden_size // self.attr_condense_factor,
                )

        self.attr_comparators = dm.modules.ModuleMap()
        if isinstance(self.attr_comparator, Mapping):
            for name, comparator in self.attr_comparator.items():
                self.attr_comparators[name] = _create_attr_comparator(comparator)
            assert (
                len(
                    set(self.attr_comparators.keys())
                    ^ set(self.meta.canonical_text_fields)
                )
                == 0
            )
        else:
            if isinstance(self.attr_summarizer, AttrSummarizer):
                self.attr_comparator = self._get_attr_comparator(
                    self.attr_comparator, self.attr_summarizer
                )
            else:
                if self.attr_comparator is None:
                    raise ValueError(
                        '"attr_comparator" must be specified if '
                        '"attr_summarizer" is custom.'
                    )

            self.attr_comparator = _create_attr_comparator(self.attr_comparator)
            for name in self.meta.canonical_text_fields:
                self.attr_comparators[name] = copy.deepcopy(self.attr_comparator)

        self.attr_merge = dm.modules._merge_module(self.attr_merge)
        self.classifier = _utils.get_module(
            Classifier, self.classifier, hidden_size=self.hidden_size
        )

        self._reset_embeddings(train_dataset.vocabs)

        # Instantiate all components using a small batch from training set.
        if not init_batch:
            run_iter = MatchingIterator(
                train_dataset,
                train_dataset,
                train=False,
                batch_size=4,
                device=-1,
                sort_in_buckets=False,
            )
            init_batch = next(run_iter.__iter__())
        self.forward(init_batch)

        # Keep this init_batch for future initializations.
        self.state_meta.init_batch = init_batch

        self._initialized = True
        logger.info(
            "Successfully initialized MatchingModel with {:d} trainable "
            "parameters.".format(tally_parameters(self))
        )