def get_parameters(opt, exp_type="model"): params = DD() params.net = DD() params.mle = 0 params.dataset = opt.dataset params.net = get_net_parameters(opt) params.train = get_training_parameters(opt) params.model = params.net.model params.exp = opt.exp params.data = get_data_parameters(opt, params.exp, params.dataset) params.eval = get_eval_parameters(opt, params.data.get("categories", None)) #params.n_per_node = opt.n_per_node #params.max_path_len = opt.max_path_len #params.n_train = opt.n_train #params.n_dev = opt.n_dev #params.n_test = opt.n_test meta = DD() params.trainer = opt.trainer meta.iterations = int(opt.iterations) meta.cycle = opt.cycle params.cycle = opt.cycle params.iters = int(opt.iterations) global toy toy = opt.toy global do_gen do_gen = opt.do_gen global save save = opt.save global test_save test_save = opt.test_save global save_strategy save_strategy = opt.save_strategy print(params) return params, meta
data_params[case.split("_")[0]] = case.split("_")[1] return data_params gens_file = args.gens_file split = gens_file.split("/")[-1].split(".")[0] n = args.n def flatten(outer): return [el for key in outer for el in key] opt = DD() opt.data = DD() opt.dataset = "atomic" opt.exp = "generation" data_params = get_data_params(gens_file) categories = data_params[ "categories"] #sorted(["oReact", "oEffect", "oWant", "xAttr", "xEffect", "xIntent", "xNeed", "xReact", "xWant"]) opt.data.categories = data_params["categories"] if "maxe1" in data_params: opt.data.maxe1 = data_params["maxe1"] opt.data.maxe2 = data_params["maxe2"] opt.data.maxr = data_params["maxr"] path = "data/atomic/processed/generation/{}.pickle".format(
import sys sys.path.append(os.getcwd()) import torch import src.data.conceptnet as cdata import src.data.data as data from utils.utils import DD import utils.utils as utils import random from src.data.utils import TextEncoder from tqdm import tqdm opt = DD() opt.dataset = "conceptnet" opt.exp = "generation" opt.data = DD() # Use relation embeddings rather than # splitting relations into its component words # Set to "language" for using component words # Set to "relation" to use unlearned relation embeddings opt.data.rel = "language" # Use 100k training set opt.data.trainsize = 100 # Use both dev sets (v1 an v2) opt.data.devversion = "12"
import src.data.data as data from utils.utils import DD import utils.utils as utils import random from src.data.utils import TextEncoder from tqdm import tqdm import torch # Manually change the set of categories you don't want to include # if you want to be able to train on a separate set of categories categories = [] categories += ["Intent"] opt = DD() opt.dataset = "motiv_sent" opt.exp = "generation" opt.data = DD() opt.data.categories = sorted(categories) encoder_path = "model/encoder_bpe_40000.json" bpe_path = "model/vocab_40000.bpe" text_encoder = TextEncoder(encoder_path, bpe_path) encoder = text_encoder.encoder n_vocab = len(text_encoder.encoder) special = [data.start_token, data.end_token] special += ["<{}>".format(cat) for cat in categories] special += [data.blank_token]