seed = 2233235 random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) # CONFIG SECTION lang = sys.argv[1] expname = "exp1_long" batch_size = 32 # how many overall iterations. iterations = 10 # A bunch of parameters from the data. data_dict = get_data(lang) train_dataset = data_dict["train"] validation_dataset = data_dict["dev"] test_dataset = data_dict["test"] reader = data_dict["reader"] WORD_EMB_DIM = data_dict["WORD_EMB_DIM"] pretrained_file = data_dict["pretrained_file"] vocab = data_dict["vocab"] print("Train data stats:") stats = get_stats(train_dataset) print("total toks: ", stats["total_toks"]) print("total tags: ", stats["total_tags"]) # don't put a slash after this. serialization_dir = "/scratch/models/{}/{}".format(lang, expname)
seed = 2233235 random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) # CONFIG SECTION lang = sys.argv[1] expname = "oracle" batch_size = 32 # how many overall iterations. iterations = 5 # A bunch of parameters from the data. data_dict = get_data(lang) train_dataset = data_dict["train"] validation_dataset = data_dict["dev"] test_dataset = data_dict["test"] reader = data_dict["reader"] WORD_EMB_DIM = data_dict["WORD_EMB_DIM"] pretrained_file = data_dict["pretrained_file"] vocab = data_dict["vocab"] # this 0.999 thing is a hack. data_dict = get_data(lang, recall=0.999) gold_train_dataset = data_dict["train"] num_tags = len(reader.alltags) # This block sets all false negative values in the training data to have a uniform tag matrix.