Exemplo n.º 1
0
 def add_cmdline_args(cls,
                      parser: ParlaiParser,
                      partial_opt: Optional[Opt] = None) -> ParlaiParser:
     """
     Override to add one arg.
     """
     TransformerGeneratorAgent.add_cmdline_args(parser,
                                                partial_opt=partial_opt)
     TorchImageAgent.add_cmdline_args(parser, partial_opt=partial_opt)
     group = parser.add_argument_group('Image Encoder Args')
     group.add_argument(
         '--include-image-token',
         type='bool',
         default=True,
         recommended=True,
         help=
         'if true, include image token (or no image token) for each example',
     )
     group.add_argument(
         '--image-fusion-type',
         type=str,
         default='late',
         choices=[f.value for f in FusionType],
         help='which fusion type to use',
     )
     return group
 def add_cmdline_args(cls, argparser):
     """
     Add command-line arguments specifically for this agent.
     """
     PolyencoderAgent.add_cmdline_args(argparser)
     TorchImageAgent.add_cmdline_args(argparser)
     agent = argparser.add_argument_group('ImagePolyencoder Args')
     agent.add_argument(
         '--image-combination-mode',
         type=str,
         default='prepend',
         choices=['add', 'append', 'prepend'],
         help='How to combine image embedding (if used) with context embedding',
     )
     # TODO: more thoroughly test out whether one of these choices is best and add a
     #  'recommended' arg here. 'add' and 'prepend' seem to be roughly similar in
     #  performance
     agent.add_argument(
         '--n-image-tokens',
         type=int,
         default=1,
         help=(
             'Number of tokens that the image encoding will consist of (when adding '
             'or prepending)'
         ),
     )
     agent.set_defaults(reduction_type=None)
     # This agent doesn't support any encoder output reductions
     return agent
Exemplo n.º 3
0
 def add_cmdline_args(cls, argparser):
     """
     Override to add one arg.
     """
     TransformerGeneratorAgent.add_cmdline_args(argparser)
     TorchImageAgent.add_cmdline_args(argparser)
     group = argparser.add_argument_group('Image Encoder Args')
     group.add_argument(
         '--include-image-token',
         type='bool',
         default=True,
         recommended=True,
         help='if true, include image token (or no image token) for each example',
     )
Exemplo n.º 4
0
    def _process_image_features(self, features: torch.Tensor) -> torch.Tensor:
        """
        Format shape and type of input image-feature tensor.

        Override TorchImageAgent._process_image_features to handle multi-dimensional
        images.
        """
        features = features.view(-1, self.image_features_dim)
        return torch.stack([
            TorchImageAgent._process_image_features(self, features[i])
            for i in range(features.size(0))
        ])
Exemplo n.º 5
0
 def add_cmdline_args(cls, argparser):
     """
     Override to add one arg.
     """
     TransformerGeneratorAgent.add_cmdline_args(argparser)
     TorchImageAgent.add_cmdline_args(argparser)
     group = argparser.add_argument_group('Image Encoder Args')
     group.add_argument(
         '--include-image-token',
         type='bool',
         default=True,
         recommended=True,
         help=
         'if true, include image token (or no image token) for each example',
     )
     group.add_argument(
         '--image-fusion-type',
         type=str,
         default='late',
         choices=[f.value for f in FusionType],
         help='which fusion type to use',
     )
Exemplo n.º 6
0
 def add_cmdline_args(cls,
                      parser: ParlaiParser,
                      partial_opt: Optional[Opt] = None) -> ParlaiParser:
     """
     Add command-line arguments specifically for this agent.
     """
     PolyencoderAgent.add_cmdline_args(parser, partial_opt=partial_opt)
     TorchImageAgent.add_cmdline_args(parser, partial_opt=partial_opt)
     agent = parser.add_argument_group('ImagePolyencoder Args')
     agent.add_argument(
         '--image-combination-mode',
         type=str,
         default='prepend',
         choices=['add', 'append', 'prepend'],
         help=
         'How to combine image embedding (if used) with context embedding',
     )
     # TODO: more thoroughly test out whether one of these choices is best and add a
     #  'recommended' arg here. 'add' and 'prepend' seem to be roughly similar in
     #  performance
     agent.set_defaults(reduction_type=None)
     # This agent doesn't support any encoder output reductions
     return agent