def test_multiple_models(self, dataset, model, synthesizer): with tempfile.TemporaryDirectory() as tmpdir: input = os.path.join(tmpdir, "input") output = os.path.join(tmpdir, "output") os.makedirs(input) os.makedirs(os.path.join(input, "model")) torch.save({"score": 0.5, "model": {"score": 0.5, "name": "tmp"}}, os.path.join(input, "model", "0")) torch.save({"score": 1.0, "model": {"score": 1.0, "name": "tmp"}}, os.path.join(input, "model", "1")) evaluate(input, output, dataset, model, synthesizer, { "accuracy": use_environment( Accuracy(), in_keys=["actual", ["ground_truth", "expected"]], value_key="actual", ), "bleu": use_environment( Bleu(), in_keys=["actual", ["ground_truth", "expected"]], value_key="actual", ), }) assert os.path.exists(os.path.join(output, "result.pt")) assert os.path.exists( os.path.join(output, "result_metrics.json"))
def test_simple_case(self): acc = Accuracy() assert np.allclose( 1.0, acc(expected="str", actual="str")) assert np.allclose( 0.0, acc(expected="int", actual="str"))
def test_multiprocess(self): accuracy = use_environment( Accuracy(), in_keys=["actual", ["ground_truth", "expected"]], value_key="actual" ) dataset = ListDataset([ Environment( {"query": "query0", "ground_truth": "c0"}, set(["ground_truth"]) ), Environment( {"query": "query1", "ground_truth": "c0"}, set(["ground_truth"]) ), Environment( {"query": "query2", "ground_truth": "c0"}, set(["ground_truth"]) ), ]) with tempfile.TemporaryDirectory() as init_dir: with context.Pool(2) as pool: procs = [] for i in range(2): p = pool.apply_async( self._run, args=(init_dir, dataset, {"accuracy": accuracy}, i), ) procs.append(p) out = [p.get() for p in procs] r0 = out[0] r1 = out[1] assert r0 == r1 results = r0 assert results.metrics == {1: {"accuracy": 1.0 / 3}, 3: {"accuracy": 2.0 / 3}} assert 3 == len(results.results) results.results[0].time = 0.0 results.results[1].time = 0.0 results.results[2].time = 0.0 results.results.sort(key=lambda x: x.sample["query"]) assert Result({"query": "query0", "ground_truth": "c0"}, ["c0", "c1", "c2"], {1: {"accuracy": 1.0}, 3: {"accuracy": 1.0}}, True, 0.0) == results.results[0] assert Result({"query": "query1", "ground_truth": "c0"}, ["c2", "c3", "c0"], {1: {"accuracy": 0.0}, 3: {"accuracy": 1.0}}, True, 0.0) == results.results[1] assert Result({"query": "query2", "ground_truth": "c0"}, ["c2", "c3", "c5"], {1: {"accuracy": 0.0}, 3: {"accuracy": 0.0}}, True, 0.0) == results.results[2]
def train(self, output_dir): with tempfile.TemporaryDirectory() as tmpdir: loss_fn = nn.Sequential( OrderedDict([ ("loss", Apply( module=Loss(), in_keys=[ "rule_probs", "token_probs", "reference_probs", "ground_truth_actions", ], out_key="action_sequence_loss", )), ("pick", mlprogram.nn.Function(Pick("action_sequence_loss"))) ])) collate = Collate(word_nl_query=CollateOptions(True, 0, -1), nl_query_features=CollateOptions(True, 0, -1), reference_features=CollateOptions(True, 0, -1), actions=CollateOptions(True, 0, -1), previous_actions=CollateOptions(True, 0, -1), previous_action_rules=CollateOptions( True, 0, -1), history=CollateOptions(False, 1, 0), hidden_state=CollateOptions(False, 0, 0), state=CollateOptions(False, 0, 0), ground_truth_actions=CollateOptions(True, 0, -1)).collate qencoder, aencoder = \ self.prepare_encoder(train_dataset, Parser()) transform = Map(self.transform_cls(qencoder, aencoder, Parser())) model = self.prepare_model(qencoder, aencoder) optimizer = self.prepare_optimizer(model) train_supervised(tmpdir, output_dir, train_dataset, model, optimizer, loss_fn, EvaluateSynthesizer(test_dataset, self.prepare_synthesizer( model, qencoder, aencoder), {"accuracy": Accuracy()}, top_n=[5]), "accuracy@5", lambda x: collate(transform(x)), 1, Epoch(100), evaluation_interval=Epoch(100), snapshot_interval=Epoch(100), threshold=1.0) return qencoder, aencoder
def _run(self, init_dir, input, output, model, synthesizer, dataset, rank): distributed.initialize(init_dir, rank, 2) evaluate( input, output, dataset, model, synthesizer, { "accuracy": use_environment( Accuracy(), in_keys=["actual", ["ground_truth", "expected"]], value_key="actual", ), "bleu": use_environment( Bleu(), in_keys=["actual", ["ground_truth", "expected"]], value_key="actual", ), } )
def evaluate(self, qencoder, aencoder, dir): model = self.prepare_model(qencoder, aencoder) eval( dir, dir, test_dataset, model, self.prepare_synthesizer(model, qencoder, aencoder), { "accuracy": use_environment( metric=Accuracy(), in_keys=["actual", ["ground_truth", "expected"]], value_key="actual") }, top_n=[5], ) return torch.load(os.path.join(dir, "result.pt"))
def test_simple_case(self): accuracy = use_environment( Accuracy(), in_keys=["actual", ["ground_truth", "expected"]], value_key="actual" ) dataset = ListDataset([ Environment( {"query": "query0", "ground_truth": "c0"}, set(["ground_truth"]) ), Environment( {"query": "query1", "ground_truth": "c0"}, set(["ground_truth"]) ), Environment( {"query": "query2", "ground_truth": "c0"}, set(["ground_truth"]) ), ]) results = EvaluateSynthesizer(dataset, synthesize, metrics={"accuracy": accuracy})() assert results.metrics == \ {1: {"accuracy": 1.0 / 3.0}, 3: {"accuracy": 2.0 / 3.0}} assert 3 == len(results.results) results.results[0].time = 0.0 results.results[1].time = 0.0 results.results[2].time = 0.0 assert Result({"query": "query0", "ground_truth": "c0"}, ["c0", "c1", "c2"], {1: {"accuracy": 1.0}, 3: {"accuracy": 1.0}}, True, 0.0) == results.results[0] assert Result({"query": "query1", "ground_truth": "c0"}, ["c2", "c3", "c0"], {1: {"accuracy": 0.0}, 3: {"accuracy": 1.0}}, True, 0.0) == results.results[1] assert Result({"query": "query2", "ground_truth": "c0"}, ["c2", "c3", "c5"], {1: {"accuracy": 0.0}, 3: {"accuracy": 0.0}}, True, 0.0) == results.results[2]