seed = 2233235
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

#  CONFIG SECTION
lang = sys.argv[1]
expname = "exp1_long"
batch_size = 32

# how many overall iterations.
iterations = 10

# A bunch of parameters from the data.
data_dict = get_data(lang)
train_dataset = data_dict["train"]
validation_dataset = data_dict["dev"]
test_dataset = data_dict["test"]
reader = data_dict["reader"]
WORD_EMB_DIM = data_dict["WORD_EMB_DIM"]
pretrained_file = data_dict["pretrained_file"]
vocab = data_dict["vocab"]

print("Train data stats:")
stats = get_stats(train_dataset)
print("total toks: ", stats["total_toks"])
print("total tags: ", stats["total_tags"])

# don't put a slash after this.
serialization_dir = "/scratch/models/{}/{}".format(lang, expname)
seed = 2233235
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

#  CONFIG SECTION
lang = sys.argv[1]
expname = "oracle"
batch_size = 32

# how many overall iterations.
iterations = 5

# A bunch of parameters from the data.
data_dict = get_data(lang)
train_dataset = data_dict["train"]
validation_dataset = data_dict["dev"]
test_dataset = data_dict["test"]
reader = data_dict["reader"]
WORD_EMB_DIM = data_dict["WORD_EMB_DIM"]
pretrained_file = data_dict["pretrained_file"]
vocab = data_dict["vocab"]

# this 0.999 thing is a hack.
data_dict = get_data(lang, recall=0.999)
gold_train_dataset = data_dict["train"]

num_tags = len(reader.alltags)

# This block sets all false negative values in the training data to have a uniform tag matrix.