def add_cmdline_args(cls, parser: ParlaiParser, partial_opt: Optional[Opt] = None) -> ParlaiParser: """ Override to add one arg. """ TransformerGeneratorAgent.add_cmdline_args(parser, partial_opt=partial_opt) TorchImageAgent.add_cmdline_args(parser, partial_opt=partial_opt) group = parser.add_argument_group('Image Encoder Args') group.add_argument( '--include-image-token', type='bool', default=True, recommended=True, help= 'if true, include image token (or no image token) for each example', ) group.add_argument( '--image-fusion-type', type=str, default='late', choices=[f.value for f in FusionType], help='which fusion type to use', ) return group
def add_cmdline_args(cls, argparser): """ Add command-line arguments specifically for this agent. """ PolyencoderAgent.add_cmdline_args(argparser) TorchImageAgent.add_cmdline_args(argparser) agent = argparser.add_argument_group('ImagePolyencoder Args') agent.add_argument( '--image-combination-mode', type=str, default='prepend', choices=['add', 'append', 'prepend'], help='How to combine image embedding (if used) with context embedding', ) # TODO: more thoroughly test out whether one of these choices is best and add a # 'recommended' arg here. 'add' and 'prepend' seem to be roughly similar in # performance agent.add_argument( '--n-image-tokens', type=int, default=1, help=( 'Number of tokens that the image encoding will consist of (when adding ' 'or prepending)' ), ) agent.set_defaults(reduction_type=None) # This agent doesn't support any encoder output reductions return agent
def add_cmdline_args(cls, argparser): """ Override to add one arg. """ TransformerGeneratorAgent.add_cmdline_args(argparser) TorchImageAgent.add_cmdline_args(argparser) group = argparser.add_argument_group('Image Encoder Args') group.add_argument( '--include-image-token', type='bool', default=True, recommended=True, help='if true, include image token (or no image token) for each example', )
def _process_image_features(self, features: torch.Tensor) -> torch.Tensor: """ Format shape and type of input image-feature tensor. Override TorchImageAgent._process_image_features to handle multi-dimensional images. """ features = features.view(-1, self.image_features_dim) return torch.stack([ TorchImageAgent._process_image_features(self, features[i]) for i in range(features.size(0)) ])
def add_cmdline_args(cls, argparser): """ Override to add one arg. """ TransformerGeneratorAgent.add_cmdline_args(argparser) TorchImageAgent.add_cmdline_args(argparser) group = argparser.add_argument_group('Image Encoder Args') group.add_argument( '--include-image-token', type='bool', default=True, recommended=True, help= 'if true, include image token (or no image token) for each example', ) group.add_argument( '--image-fusion-type', type=str, default='late', choices=[f.value for f in FusionType], help='which fusion type to use', )
def add_cmdline_args(cls, parser: ParlaiParser, partial_opt: Optional[Opt] = None) -> ParlaiParser: """ Add command-line arguments specifically for this agent. """ PolyencoderAgent.add_cmdline_args(parser, partial_opt=partial_opt) TorchImageAgent.add_cmdline_args(parser, partial_opt=partial_opt) agent = parser.add_argument_group('ImagePolyencoder Args') agent.add_argument( '--image-combination-mode', type=str, default='prepend', choices=['add', 'append', 'prepend'], help= 'How to combine image embedding (if used) with context embedding', ) # TODO: more thoroughly test out whether one of these choices is best and add a # 'recommended' arg here. 'add' and 'prepend' seem to be roughly similar in # performance agent.set_defaults(reduction_type=None) # This agent doesn't support any encoder output reductions return agent