예제 #1
0
def eval(args):
    lpreds, ipreds = load(args.label_path), load(args.intent_path)
    lgolds, igolds = \
        load(args.gold_label_path), load(args.gold_intent_path)
    res = evaluate(lgolds, igolds, lpreds, ipreds, detailed=args.detailed)
    dump = utils.map_val(args.format, {
        "yaml": utils.dump_yaml,
        "json": json.dump,
    }, "output format")
    dump(res, sys.stdout)
예제 #2
0
def get_optimizer_cls(args):
    kwargs = dict()
    if args.learning_rate is not None:
        kwargs["lr"] = args.learning_rate
    return utils.map_val(
        args.optimizer, {
            "adam": lambda p: op.Adam(p, **kwargs),
            "adamax": lambda p: op.Adamax(p, **kwargs),
            "adagrad": lambda p: op.Adagrad(p, **kwargs),
            "adadelta": lambda p: op.Adadelta(p, **kwargs)
        }, "optimizer")
예제 #3
0
def get_embeddings(args):
    return utils.map_val(
        args.embed_type,
        {
            "glove-format": lambda: GloveFormatEmbeddings(
                path=args.embed_path,
                #words=set(vocab.f2i) if vocab is not None else None
            ),
            "tar-format": lambda: TarFormatEmbeddings(path=args.embed_path)
        },
        "embedding type")()
예제 #4
0
def get_embeddings(args, vocab=None):
    return utils.map_val(
        args.word_embed_type, {
            "glove-format":
            lambda: embedding.glove.GloveFormatEmbeddings(
                path=args.word_embed_path,
                words=set(vocab.f2i) if vocab is not None else None),
            "tar-format":
            lambda: embedding.tarformat.TarFormatEmbeddings(path=args.
                                                            word_embed_path),
            "fasttext":
            lambda: embedding.fasttext.FastTextEmbeddings(
                fasttext_path=args.fasttext_path,
                model_path=args.word_embed_path)
        }, "embedding type")()
예제 #5
0
    def get_module_cls(self, key, kwargs_map=None, fallback=None):
        if fallback is None:
            fallback = {}
        if kwargs_map is None:
            kwargs_map = {}
        type = self.get(key)
        cls = manager.get(type)
        sub_kwargs = utils.map_val(type,
                                   kwargs_map,
                                   ignore_err=True,
                                   fallback=fallback)

        def create(*args, **kwargs):
            return cls(*args, **kwargs, **sub_kwargs)

        return create
예제 #6
0
def save_template(args):
    pkg = utils.import_module(f"model.{args.package}")
    if args.module_name is not None:
        clsmap = manager.get_module_namemap(pkg)
        cls = clsmap.get(args.module_name)
    else:
        cls = manager.get_module_classes(pkg)[0]
    template = {
        "type": cls.name,
        "vargs": model.get_optarg_template(cls)
    }
    dump = utils.map_val(args.format, {
        "yaml": utils.dump_yaml,
        "json": json.dump
    }, "template format")
    with open(args.save_path, "w") as f:
        dump(template, f)
예제 #7
0
def get_embeddings(args, vocab):
    return utils.map_val(
        args.embed_type, {
            "glove-format":
            GloveFormatEmbeddings(path=args.embed_path, words=set(vocab.f2i))
        }, "embedding type")
예제 #8
0
def generate(args):
    devices = utils.get_devices(args.gpu)
    if args.seed is not None:
        utils.manual_seed(args.seed)

    logging.info("Loading data...")
    vocab_paths = [args.word_vocab, args.label_vocab, args.intent_vocab]
    vocabs = [utils.load_pkl(v) for v in vocab_paths]
    dataloader = None

    logging.info("Initializing generation environment...")
    model, vocabs[0] = prepare_model(args, vocabs)
    model = utils.to_device(model, devices)
    encoder = encode.Encoder(model=model,
                             device=devices[0],
                             batch_size=args.batch_size)
    generator = Generator(model=model,
                          device=devices[0],
                          batch_size=args.batch_size,
                          sent_vocab=vocabs[0],
                          label_vocab=vocabs[1],
                          intent_vocab=vocabs[2],
                          bos=args.bos,
                          eos=args.eos,
                          unk=args.unk,
                          max_len=args.max_length,
                          beam_size=args.beam_size,
                          beam_topk=args.beam_sample_topk,
                          validate=args.validate)

    logging.info("Commencing generation...")
    if args.generation_type in {"posterior", "uniform"}:
        if dataloader is None:
            dataloader = create_dataloader(args, vocabs)
    sampler = utils.map_val(args.generation_type, {
        "gaussian":
        lambda: None,
        "posterior":
        lambda: MultivariateGaussianMixtureSampler(
            *encoder.encode(dataloader), scale=args.posterior_sampling_scale),
        "uniform":
        lambda: UniformNoiseSampler(encoder.encode(dataloader)[0],
                                    pa=args.uniform_sampling_pa,
                                    pm=args.uniform_sampling_pm)
    },
                            name="sampler")()
    with torch.no_grad():
        gens, probs = generator.generate(args.samples, sampler)
    if args.nearest_neighbors is not None:
        if dataloader is None:
            dataloader = create_dataloader(args, vocabs)
        sents = [data["string"][0] for data in dataloader.dataset]
        searcher = neighbor.PyTorchPCASearcher(
            pca_dim=100,
            sents=sents,
            num_neighbors=args.nearest_neighbors,
            batch_size=args.nearest_neighbors_batch_size,
            device=devices[0])
        neighbors = searcher.search(gens[0])
    else:
        neighbors = None
    report_stats(args, gens[0], neighbors)
    save(args, gens, probs, neighbors)

    logging.info("Done!")