Пример #1
0
 def add_model_subargs(self, model: str, partial: Opt):
     """
     Add arguments specific to a particular model.
     """
     agent = load_agent_module(model)
     try:
         if hasattr(agent, 'add_cmdline_args'):
             agent.add_cmdline_args(self, partial)
     except TypeError as typ:
         raise TypeError(
             f"Agent '{model}' appears to have signature "
             "add_cmdline_args(argparser) but we have updated the signature "
             "to add_cmdline_args(argparser, partial_opt). For details, see "
             "https://github.com/facebookresearch/ParlAI/pull/3328."
         ) from typ
     except argparse.ArgumentError:
         # already added
         pass
     try:
         if hasattr(agent, 'dictionary_class'):
             s = class2str(agent.dictionary_class())
             self.set_defaults(dict_class=s)
     except argparse.ArgumentError:
         # already added
         pass
Пример #2
0
def create_agent(opt: Opt, requireModelExists=False):
    """
    Create an agent from the options ``model``, ``model_params`` and ``model_file``.

    The input is either of the form
    ``parlai.agents.ir_baseline.agents:IrBaselineAgent`` (i.e. the path
    followed by the class name) or else just ``ir_baseline`` which
    assumes the path above, and a class name suffixed with 'Agent'.

    If ``model-file`` is available in the options this function can also
    attempt to load the model from that location instead. This avoids having to
    specify all the other options necessary to set up the model including its
    name as they are all loaded from the options file if it exists (the file
    opt['model_file'] + '.opt' must exist and contain a pickled or json dict
    containing the model's options).
    """
    if opt.get('datapath', None) is None:
        # add datapath, it is missing
        from parlai.core.params import ParlaiParser, get_model_name

        parser = ParlaiParser(add_parlai_args=False)
        parser.add_parlai_data_path()
        # add model args if they are missing
        model = get_model_name(opt)
        if model is not None:
            parser.add_model_subargs(model)
        opt_parser = parser.parse_args("", print_args=False)
        for k, v in opt_parser.items():
            if k not in opt:
                opt[k] = v

    if opt.get('model_file'):
        opt['model_file'] = modelzoo_path(opt.get('datapath'),
                                          opt['model_file'])
        if requireModelExists and not os.path.isfile(opt['model_file']):
            raise RuntimeError(
                'WARNING: Model file does not exist, check to make '
                'sure it is correct: {}'.format(opt['model_file']))
        # Attempt to load the model from the model file first (this way we do
        # not even have to specify the model name as a parameter)
        model = create_agent_from_opt_file(opt)
        if model is not None:
            return model
        else:
            logging.info(
                f"No model with opt yet at: {opt['model_file']}(.opt)")

    if opt.get('model'):
        model_class = load_agent_module(opt['model'])
        # if we want to load weights from --init-model, compare opts with
        # loaded ones
        compare_init_model_opts(opt, opt)
        model = model_class(opt)
        if requireModelExists and hasattr(
                model, 'load') and not opt.get('model_file'):
            # double check that we didn't forget to set model_file on loadable model
            logging.warn('model_file unset but model has a `load` function.')
        return model
    else:
        raise RuntimeError('Need to set `model` argument to use create_agent.')
Пример #3
0
def create_agent_from_opt_file(opt: Opt):
    """
    Load agent options and module from file if opt file exists.

    Checks to see if file exists opt['model_file'] + ".opt"; if so, load up the
    options from the file and use that to create an agent, loading the model
    type from that file and overriding any options specified in that file when
    instantiating the agent.

    If that file does not exist, return None.
    """
    model_file = opt['model_file']
    optfile = model_file + '.opt'
    if os.path.isfile(optfile):
        new_opt = Opt.load(optfile)
        # TODO we need a better way to say these options are never copied...
        if 'datapath' in new_opt:
            # never use the datapath from an opt dump
            del new_opt['datapath']
        if 'batchindex' in new_opt:
            # This saved variable can cause trouble if we switch to BS=1 at test time
            del new_opt['batchindex']
        # only override opts specified in 'override' dict
        if opt.get('override'):
            for k, v in opt['override'].items():
                if k in new_opt and str(v) != str(new_opt.get(k)):
                    logging.warn(
                        f"overriding opt['{k}'] to {v} (previously: {new_opt.get(k)})"
                    )
                new_opt[k] = v

        model_class = load_agent_module(new_opt['model'])

        if hasattr(model_class, 'upgrade_opt'):
            new_opt = model_class.upgrade_opt(new_opt)

        # add model arguments to new_opt if they aren't in new_opt already
        for k, v in opt.items():
            if k not in new_opt:
                new_opt[k] = v
        new_opt['model_file'] = model_file
        if not new_opt.get('dict_file'):
            new_opt['dict_file'] = model_file + '.dict'
        elif new_opt.get('dict_file') and not os.path.isfile(
                new_opt['dict_file']):
            old_dict_file = new_opt['dict_file']
            new_opt['dict_file'] = model_file + '.dict'
        if not os.path.isfile(new_opt['dict_file']):
            warn_once(
                'WARNING: Neither the specified dict file ({}) nor the '
                '`model_file`.dict file ({}) exists, check to make sure either '
                'is correct. This may manifest as a shape mismatch later '
                'on.'.format(old_dict_file, new_opt['dict_file']))

        # if we want to load weights from --init-model, compare opts with
        # loaded ones
        compare_init_model_opts(opt, new_opt)
        return model_class(new_opt)
    else:
        return None
Пример #4
0
 def add_model_subargs(self, model):
     """
     Add arguments specific to a particular model.
     """
     agent = load_agent_module(model)
     try:
         if hasattr(agent, 'add_cmdline_args'):
             agent.add_cmdline_args(self)
     except argparse.ArgumentError:
         # already added
         pass
     try:
         if hasattr(agent, 'dictionary_class'):
             s = class2str(agent.dictionary_class())
             self.set_defaults(dict_class=s)
     except argparse.ArgumentError:
         # already added
         pass
Пример #5
0
    def _init_shared_model(self, opt_key: str):
        """
        Initialize a shared version of the "knowledge" model.

        This just makes sure that each "agent" has the same params, but different
        history objects.

        :param opt_key:
            which sub agent to create with the shared model.
        """
        opt = self.opts[opt_key]
        opt.update(opt['override'])
        if 'model' in opt['override']:
            model_class = load_agent_module(opt['override']['model'])
        else:
            model_class = type(self.knowledge_agent)
        krm_shared = self.knowledge_agent.share()
        krm_shared['opt'] = opt
        return model_class(opt, krm_shared)
Пример #6
0
def create_agent_from_opt_file(opt: Opt):
    """
    Load agent options and module from file if opt file exists.

    Checks to see if file exists opt['model_file'] + ".opt"; if so, load up the
    options from the file and use that to create an agent, loading the model
    type from that file and overriding any options specified in that file when
    instantiating the agent.

    If that file does not exist, return None.
    """
    model_file = opt['model_file']
    optfile = model_file + '.opt'

    if not PathManager.exists(optfile):
        return None

    opt_from_file = Opt.load(optfile)

    # delete args that we do not want to copy over when loading the model
    for arg in NOCOPY_ARGS:
        if arg in opt_from_file:
            del opt_from_file[arg]

    # only override opts specified in 'override' dict
    if opt.get('override'):
        for k, v in opt['override'].items():
            if k in opt_from_file and str(v) != str(opt_from_file.get(k)):
                logging.warning(
                    f'Overriding opt["{k}"] to {v} (previously: {opt_from_file.get(k)})'
                )
            opt_from_file[k] = v

    model_class = load_agent_module(opt_from_file['model'])

    if hasattr(model_class, 'upgrade_opt'):
        opt_from_file = model_class.upgrade_opt(opt_from_file)

    # add model arguments to opt_from_file if they aren't in opt_from_file already
    for k, v in opt.items():
        if k not in opt_from_file:
            opt_from_file[k] = v

    # update model file path to the one set by opt
    opt_from_file['model_file'] = model_file
    # update init model path to the one set by opt
    # NOTE: this step is necessary when for example the 'init_model' is
    # set by the Train Loop (as is the case when loading from checkpoint)
    if opt.get('init_model') is not None:
        opt_from_file['init_model'] = opt['init_model']

    # update dict file path
    if not opt_from_file.get('dict_file'):
        old_dict_file = None
        opt_from_file['dict_file'] = model_file + '.dict'
    elif opt_from_file.get('dict_file') and not PathManager.exists(
            opt_from_file['dict_file']):
        old_dict_file = opt_from_file['dict_file']
        opt_from_file['dict_file'] = model_file + '.dict'
    if not PathManager.exists(opt_from_file['dict_file']):
        warn_once(
            'WARNING: Neither the specified dict file ({}) nor the '
            '`model_file`.dict file ({}) exists, check to make sure either '
            'is correct. This may manifest as a shape mismatch later '
            'on.'.format(old_dict_file, opt_from_file['dict_file']))

    # if we want to load weights from --init-model, compare opts with
    # loaded ones
    compare_init_model_opts(opt, opt_from_file)
    return model_class(opt_from_file)
Пример #7
0
 def test_load_internal_agent(self):
     agent_module = load_agent_module('internal:parrot')
     assert agent_module, 'Could not load internal agent'
Пример #8
0
 def test_load_agent(self):
     agent_module = load_agent_module(OPTIONS['agent'])
     self.assertEqual(agent_module, RepeatLabelAgent)
Пример #9
0
def create_agent_from_opt_file(opt: Opt):
    """
    Load agent options and module from file if opt file exists.

    Checks to see if file exists opt['model_file'] + ".opt"; if so, load up the
    options from the file and use that to create an agent, loading the model
    type from that file and overriding any options specified in that file when
    instantiating the agent.

    If that file does not exist, return None.
    """
    model_file = opt['model_file']
    optfile = model_file + '.opt'
    if os.path.isfile(optfile):
        new_opt = Opt.load(optfile)
        # TODO we need a better way to say these options are never copied...
        if 'datapath' in new_opt:
            # never use the datapath from an opt dump
            del new_opt['datapath']
        if 'batchindex' in new_opt:
            # This saved variable can cause trouble if we switch to BS=1 at test time
            del new_opt['batchindex']
        # only override opts specified in 'override' dict
        if opt.get('override'):
            for k, v in opt['override'].items():
                if str(v) != str(new_opt.get(k, None)):
                    print(
                        "[ warning: overriding opt['{}'] to {} ("
                        "previously: {} )]".format(k, v, new_opt.get(k, None))
                    )
                new_opt[k] = v

        model_class = load_agent_module(new_opt['model'])

        # check for model version
        if hasattr(model_class, 'model_version'):
            curr_version = new_opt.get('model_version', 0)
            if curr_version != model_class.model_version():
                model = new_opt['model']
                m = (
                    'It looks like you are trying to load an older version of'
                    ' the selected model. Change your model argument to use '
                    'the old version from parlai/agents/legacy_agents: for '
                    'example: `-m legacy:{m}:{v}` or '
                    '`--model parlai.agents.legacy_agents.{m}.{m}_v{v}:{c}`'
                )
                if '.' not in model:
                    # give specific error message if it's easy
                    raise RuntimeError(
                        m.format(m=model, v=curr_version, c=model_class.__name__)
                    )
                else:
                    # otherwise generic one
                    raise RuntimeError(
                        m.format(m='modelname', v=curr_version, c='ModelAgent')
                    )

        if hasattr(model_class, 'upgrade_opt'):
            new_opt = model_class.upgrade_opt(new_opt)

        # add model arguments to new_opt if they aren't in new_opt already
        for k, v in opt.items():
            if k not in new_opt:
                new_opt[k] = v
        new_opt['model_file'] = model_file
        if not new_opt.get('dict_file'):
            new_opt['dict_file'] = model_file + '.dict'
        elif new_opt.get('dict_file') and not os.path.isfile(new_opt['dict_file']):
            old_dict_file = new_opt['dict_file']
            new_opt['dict_file'] = model_file + '.dict'
        if not os.path.isfile(new_opt['dict_file']):
            warn_once(
                'WARNING: Neither the specified dict file ({}) nor the '
                '`model_file`.dict file ({}) exists, check to make sure either '
                'is correct. This may manifest as a shape mismatch later '
                'on.'.format(old_dict_file, new_opt['dict_file'])
            )

        # if we want to load weights from --init-model, compare opts with
        # loaded ones
        compare_init_model_opts(opt, new_opt)
        return model_class(new_opt)
    else:
        return None
Пример #10
0
def get_dataset_classes(opt):
    """
    Get datasets from the options.

    To use a custom dataset (as opposed to the StreamDataset or ParlAIDataset),
    you can subclass the pytorch Dataset class and specify its location on the
    command line.

    For example, the VQA v1 task provides a custom dataset, which can
    be specified on the command line as follows: ``-pytd vqa_v1:VQADataset``

    Note that if the dataset is named ``DefaultDataset``, then you do
    not need to specify its name following the colon; e.g., it
    would just be: ``-pytd vqa_v1``
    """
    if 'stream' in opt.get('datatype'):
        default_dataset = StreamDataset
    else:
        default_dataset = ParlAIDataset
    dataset_name = opt.get('pytorch_teacher_dataset')
    task_name = opt.get('pytorch_teacher_task')
    datasets = []
    if task_name is not None:
        datasets += [(default_dataset, default_collate, task)
                     for task in task_name.split(',')]
    if not dataset_name:
        return datasets
    sps = [d.strip() for d in dataset_name.split(',')]
    for sp in sps:
        full_task_name = sp
        repo = 'parlai'
        if sp.startswith('internal:'):
            # To switch to local repo, useful for non-public projects
            # (make a directory called 'parlai_internal' with your private agents)
            repo = 'parlai_internal'
            sp = sp[9:]
        sp = sp.split(':')
        if '.' in sp[0]:
            module_name = sp[0]
        else:
            dataset = sp[0].lower()
            module_name = '{}.tasks.{}.agents'.format(repo, dataset)
        if len(sp) > 1:
            sp[1] = sp[1][0].upper() + sp[1][1:]
            dataset = sp[1]
            if '.' not in sp[0] and 'Dataset' not in dataset:
                # Reformat from underscore to CamelCase and append "Dataset" to
                # class name by default if a complete path is not given.
                words = dataset.split('_')
                teacher_name = ''
                for w in words:
                    teacher_name += w[0].upper() + w[1:]
                dataset = teacher_name + 'Dataset'
        else:
            dataset = 'DefaultDataset'
        my_module = importlib.import_module(module_name)
        dataset_class = getattr(my_module, dataset)

        collate = default_collate
        if hasattr(dataset_class, 'collate'):
            collate = dataset_class.collate
        elif opt.get('model', False):
            agent_class = load_agent_module(opt.get('model'))
            if hasattr(agent_class, 'collate'):
                collate = agent_class.collate
        datasets.append((dataset_class, collate, full_task_name))
    return datasets