def eval(args): lpreds, ipreds = load(args.label_path), load(args.intent_path) lgolds, igolds = \ load(args.gold_label_path), load(args.gold_intent_path) res = evaluate(lgolds, igolds, lpreds, ipreds, detailed=args.detailed) dump = utils.map_val(args.format, { "yaml": utils.dump_yaml, "json": json.dump, }, "output format") dump(res, sys.stdout)
def get_optimizer_cls(args): kwargs = dict() if args.learning_rate is not None: kwargs["lr"] = args.learning_rate return utils.map_val( args.optimizer, { "adam": lambda p: op.Adam(p, **kwargs), "adamax": lambda p: op.Adamax(p, **kwargs), "adagrad": lambda p: op.Adagrad(p, **kwargs), "adadelta": lambda p: op.Adadelta(p, **kwargs) }, "optimizer")
def get_embeddings(args): return utils.map_val( args.embed_type, { "glove-format": lambda: GloveFormatEmbeddings( path=args.embed_path, #words=set(vocab.f2i) if vocab is not None else None ), "tar-format": lambda: TarFormatEmbeddings(path=args.embed_path) }, "embedding type")()
def get_embeddings(args, vocab=None): return utils.map_val( args.word_embed_type, { "glove-format": lambda: embedding.glove.GloveFormatEmbeddings( path=args.word_embed_path, words=set(vocab.f2i) if vocab is not None else None), "tar-format": lambda: embedding.tarformat.TarFormatEmbeddings(path=args. word_embed_path), "fasttext": lambda: embedding.fasttext.FastTextEmbeddings( fasttext_path=args.fasttext_path, model_path=args.word_embed_path) }, "embedding type")()
def get_module_cls(self, key, kwargs_map=None, fallback=None): if fallback is None: fallback = {} if kwargs_map is None: kwargs_map = {} type = self.get(key) cls = manager.get(type) sub_kwargs = utils.map_val(type, kwargs_map, ignore_err=True, fallback=fallback) def create(*args, **kwargs): return cls(*args, **kwargs, **sub_kwargs) return create
def save_template(args): pkg = utils.import_module(f"model.{args.package}") if args.module_name is not None: clsmap = manager.get_module_namemap(pkg) cls = clsmap.get(args.module_name) else: cls = manager.get_module_classes(pkg)[0] template = { "type": cls.name, "vargs": model.get_optarg_template(cls) } dump = utils.map_val(args.format, { "yaml": utils.dump_yaml, "json": json.dump }, "template format") with open(args.save_path, "w") as f: dump(template, f)
def get_embeddings(args, vocab): return utils.map_val( args.embed_type, { "glove-format": GloveFormatEmbeddings(path=args.embed_path, words=set(vocab.f2i)) }, "embedding type")
def generate(args): devices = utils.get_devices(args.gpu) if args.seed is not None: utils.manual_seed(args.seed) logging.info("Loading data...") vocab_paths = [args.word_vocab, args.label_vocab, args.intent_vocab] vocabs = [utils.load_pkl(v) for v in vocab_paths] dataloader = None logging.info("Initializing generation environment...") model, vocabs[0] = prepare_model(args, vocabs) model = utils.to_device(model, devices) encoder = encode.Encoder(model=model, device=devices[0], batch_size=args.batch_size) generator = Generator(model=model, device=devices[0], batch_size=args.batch_size, sent_vocab=vocabs[0], label_vocab=vocabs[1], intent_vocab=vocabs[2], bos=args.bos, eos=args.eos, unk=args.unk, max_len=args.max_length, beam_size=args.beam_size, beam_topk=args.beam_sample_topk, validate=args.validate) logging.info("Commencing generation...") if args.generation_type in {"posterior", "uniform"}: if dataloader is None: dataloader = create_dataloader(args, vocabs) sampler = utils.map_val(args.generation_type, { "gaussian": lambda: None, "posterior": lambda: MultivariateGaussianMixtureSampler( *encoder.encode(dataloader), scale=args.posterior_sampling_scale), "uniform": lambda: UniformNoiseSampler(encoder.encode(dataloader)[0], pa=args.uniform_sampling_pa, pm=args.uniform_sampling_pm) }, name="sampler")() with torch.no_grad(): gens, probs = generator.generate(args.samples, sampler) if args.nearest_neighbors is not None: if dataloader is None: dataloader = create_dataloader(args, vocabs) sents = [data["string"][0] for data in dataloader.dataset] searcher = neighbor.PyTorchPCASearcher( pca_dim=100, sents=sents, num_neighbors=args.nearest_neighbors, batch_size=args.nearest_neighbors_batch_size, device=devices[0]) neighbors = searcher.search(gens[0]) else: neighbors = None report_stats(args, gens[0], neighbors) save(args, gens, probs, neighbors) logging.info("Done!")