Пример #1
0
 def copy_config(self):
     '''
     Copies the config to use for different grid search configurations
     '''
     c = Config()
     c.__dict__ = self.__dict__.copy()
     return c
Пример #2
0
        torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip)
        optimizer.step()

        # Save model checkpoint
        if train_num_batches > 0 and train_num_batches % config.eval_every_minibatch == 0:
            model.eval()
            dev_evaluator.evaluate(model, train_num_batches, float(loss))

        if train_num_batches > config.num_batches:
            break


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-c", "--config_file", required=True)
    parser.add_argument("-g", "--grid_search", default="False")
    args = parser.parse_args()

    # Set up the config
    config = Config(args.config_file)

    # For non grid search, must set up exp dir
    if args.grid_search == "False":
        exp_dir = make_exp_dir(config)
        copytree(os.path.join(os.environ['SED_ROOT'], 'src'), os.path.join(exp_dir, 'src'))  
        config.save_config(exp_dir)
    else:
        exp_dir = os.path.split(args.config_file)[0]

    train_model(config, exp_dir)
Пример #3
0
        value = sdtw.compute()
        dtw_mat[i] = torch.from_numpy(sdtw.grad())

    return dtw_mat

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-t", "--test_file", required=True)
    parser.add_argument("-s", "--stance_exp_dir", required=True)
    parser.add_argument("-d", "--dtw_exp_dir", required=True)    
    parser.add_argument("-o", "--output_folder", required = True)

    args = parser.parse_args()
    output_folder = args.output_folder

    config = Config(os.path.join(args.stance_exp_dir, "config.json"))
    tokenizer, max_len_token = get_tokenizer(config)
    vocab = get_vocab(config, tokenizer, max_len_token)
    qry_tk, cnd_tk, qry_len, cnd_len = load_data(args.test_file, vocab, config, tokenizer, max_len_token)

    stance_sim, stance_dist, stance_pi, stance_mulpld = print_stance_mm(args.stance_exp_dir, qry_tk, cnd_tk)
    dtw_mat = print_dtw_mm(args.dtw_exp_dir, qry_tk, cnd_tk)

    stance_sim, stance_dist, stance_pi, stance_mulpld, dtw_mat = stance_sim.cpu().data.numpy(),  stance_dist.cpu().data.numpy(),\
                                                             stance_pi.cpu().data.numpy(), stance_mulpld.cpu().data.numpy(), \
                                                                    dtw_mat.cpu().data.numpy()
    min_sim = np.min(stance_sim)
    max_sim = np.max(stance_sim)
    min_pi = np.min(stance_pi)
    max_pi = np.max(stance_pi)
    min_dist = np.min(stance_dist)
Пример #4
0
        labeled_file=test_label_file, output_file=test_output_file)
    model = torch.load(os.path.join(exp_dir, "best_model"))

    model.eval()
    test_evaluator.evaluate(model, train_num_batches=-1, train_loss=-1)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-c", "--config_dir", required=True)
    parser.add_argument("-p", "--is_parallel", required=True)
    parser.add_argument("-s", "--shard")
    args = parser.parse_args()

    config_file = os.path.join(args.config_dir, "config.json")
    config = Config(config_file)

    if args.is_parallel == "True":
        data_dir = os.path.split(config.test_file)[0]
        test_output_dir = os.path.join(args.config_dir, "test_shards")
        if not os.path.exists(test_output_dir):
            os.makedirs(test_output_dir)
        test_output_file = os.path.join(test_output_dir,
                                        "shard_" + args.shard + ".pred")
        test_label_file = os.path.join(data_dir, "test_shards",
                                       "shard_" + args.shard)

    else:
        test_output_file = os.path.join(args.config_dir, "test.pred")
        test_label_file = config.test_file
Пример #5
0
        optimizer.step()

        if train_num_batches == config.num_minibatches:
            break
        if train_num_batches > 0 and train_num_batches % config.eval_every_minibatch == 0:
            model.eval()
            dev_evaluator.evaluate(model, train_num_batches)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-c", "--config_file", required=True)
    parser.add_argument("-g", "--grid_search", default="False")
    args = parser.parse_args()

    # Set up the config
    config = Config(args.config_file)
    config.update_dataset()

    # For non grid search, must set up exp dir
    if args.grid_search == "False":
        exp_dir = make_exp_dir(config.dataset_name, config.model_name,
                               config.tokenizer_name)
        copytree(os.path.join(os.environ['SED_ROOT'], 'src'),
                 os.path.join(exp_dir, 'src'))
        config.save_config(exp_dir)
    else:
        exp_dir = os.path.split(args.config_file)[0]

    train_model(config, exp_dir)
Пример #6
0
from main.objects.DataExtractor import DataExtractor
from main.objects.Tokenizer import Tokenizer

from main.utils.model_helper import get_baseline
from main.utils.util import make_exp_dir


def test_model(config, exp_dir, test_label_file=None, test_output_file=None):
    test_evaluator = Evaluator(config, 'test', exp_dir, list_k=[1, 10, 50, 100], \
        labeled_file=test_label_file, output_file=test_output_file)

    data_extractor = DataExtractor(config)
    tokenizer = Tokenizer(config)
    model = get_baseline(config, data_extractor, tokenizer)

    test_evaluator.evaluate(model, train_num_batches=-1, train_loss=-1)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-c", "--config_file", required=True)
    parser.add_argument("-f", "--fold")
    args = parser.parse_args()

    config = Config(args.config_file)
    exp_dir = os.path.dirname(args.config_file)

    test_output_file = os.path.join(exp_dir, "test_fold_%d.pred" % int(args.fold))
    test_label_file = os.path.join("data", "cross_validation", "fold_%d" % int(args.fold), "test.data")

    test_model(config, exp_dir, test_label_file, test_output_file)