Exemplo n.º 1
0
def read_config(file_):
    config = DD()
    print(file_)
    for k, v in file_.items():
        if v == "True" or v == "T" or v == "true":
            config[k] = True
        elif v == "False" or v == "F" or v == "false":
            config[k] = False
        elif type(v) == dict:
            config[k] = read_config(v)
        else:
            config[k] = v

    return config
Exemplo n.º 2
0
def get_data_parameters(opt, experiment, dataset):
    data = DD()
    if dataset == "atomic":
        data.categories = sorted(opt.categories)
        # hard-coded
        data.maxe1 = 17
        data.maxe2 = 35
        data.maxr = 1

    elif dataset == "conceptnet":
        data.rel = opt.relation_format
        data.trainsize = opt.training_set_size
        data.devversion = opt.development_set_versions_to_use
        data.maxe1 = opt.max_event_1_size
        data.maxe2 = opt.max_event_2_size
        if data.rel == "language":
            # hard-coded
            data.maxr = 5
        else:
            # hard-coded
            data.maxr = 1

    return data
Exemplo n.º 3
0
def get_eval_parameters(opt, force_categories=None):
    evaluate = DD()

    if opt.eval_sampler == "beam":
        evaluate.bs = opt.beam_size
    elif opt.eval_sampler == "greedy":
        evaluate.bs = 1
    elif opt.eval_sampler == "topk":
        evaluate.k = opt.topk_size

    evaluate.smax = opt.gen_seqlength
    evaluate.sample = opt.eval_sampler

    evaluate.numseq = opt.num_sequences

    evaluate.gs = opt.generate_sequences
    evaluate.es = opt.evaluate_sequences

    if opt.dataset == "atomic":
        if "eval_categories" in opt and force_categories is None:
            evaluate.categories = opt.eval_categories
        else:
            evaluate.categories = force_categories

    return evaluate
Exemplo n.º 4
0
def get_net_parameters(opt):
    net = DD()
    net.model = opt.model
    net.nL = opt.num_layers
    net.nH = opt.num_heads
    net.hSize = opt.hidden_dim
    net.edpt = opt.embedding_dropout
    net.adpt = opt.attention_dropout
    net.rdpt = opt.residual_dropout
    net.odpt = opt.output_dropout
    net.pt = opt.pretrain
    net.afn = opt.activation

    # how to intialize parameters
    # format is gauss+{}+{}.format(mean, std)
    # n = the default initialization pytorch
    net.init = opt.init

    return net
Exemplo n.º 5
0
def get_training_parameters(opt):
    train = DD()
    static = DD()
    static.exp = opt.exp

    static.seed = opt.random_seed

    # weight decay
    static.l2 = opt.l2
    static.vl2 = True
    static.lrsched = opt.learning_rate_schedule  # 'warmup_linear'
    static.lrwarm = opt.learning_rate_warmup  # 0.002

    # gradient clipping
    static.clip = opt.clip

    # what loss function to use
    static.loss = opt.loss

    dynamic = DD()
    dynamic.lr = opt.learning_rate  # learning rate
    dynamic.bs = opt.batch_size  # batch size
    # optimizer to use {adam, rmsprop, etc.}
    dynamic.optim = opt.optimizer

    # rmsprop
    # alpha is interpolation average

    static.update(opt[dynamic.optim])

    train.static = static
    train.dynamic = dynamic

    return train
Exemplo n.º 6
0
def get_parameters(opt, exp_type="model"):
    params = DD()
    params.net = DD()

    params.mle = 0
    params.dataset = opt.dataset

    params.net = get_net_parameters(opt)
    params.train = get_training_parameters(opt)

    params.model = params.net.model
    params.exp = opt.exp

    params.data = get_data_parameters(opt, params.exp, params.dataset)
    params.eval = get_eval_parameters(opt, params.data.get("categories", None))

    meta = DD()

    params.trainer = opt.trainer

    meta.iterations = int(opt.iterations)
    meta.cycle = opt.cycle
    params.cycle = opt.cycle
    params.iters = int(opt.iterations)

    global toy
    toy = opt.toy

    global do_gen
    do_gen = opt.do_gen

    global save
    save = opt.save

    global test_save
    test_save = opt.test_save

    global save_strategy
    save_strategy = opt.save_strategy

    print(params)
    return params, meta
Exemplo n.º 7
0
import os
import sys

sys.path.append(os.getcwd())

import torch
import src.data.conceptnet as cdata
import src.data.data as data

from utils.utils import DD
import utils.utils as utils
import random
from src.data.utils import TextEncoder
from tqdm import tqdm

opt = DD()
opt.dataset = "conceptnet"
opt.exp = "generation"

opt.data = DD()

# Use relation embeddings rather than
# splitting relations into its component words
# Set to "language" for using component words
# Set to "relation" to use unlearned relation embeddings
opt.data.rel = "language"

# Use 100k training set
opt.data.trainsize = 100

# Use both dev sets (v1 an v2)
Exemplo n.º 8
0
            data_params[case.split("_")[0]] = case.split("_")[1].split("#")
        else:
            data_params[case.split("_")[0]] = case.split("_")[1]
    return data_params


gens_file = args.gens_file
split = gens_file.split("/")[-1].split(".")[0]
n = args.n


def flatten(outer):
    return [el for key in outer for el in key]


opt = DD()
opt.data = DD()
opt.dataset = "atomic"
opt.exp = "generation"

data_params = get_data_params(gens_file)

categories = data_params[
    "categories"]  #sorted(["oReact", "oEffect", "oWant", "xAttr", "xEffect", "xIntent", "xNeed", "xReact", "xWant"])

opt.data.categories = data_params["categories"]

if "maxe1" in data_params:
    opt.data.maxe1 = data_params["maxe1"]
    opt.data.maxe2 = data_params["maxe2"]
    opt.data.maxr = data_params["maxr"]
Exemplo n.º 9
0
#parser.add_argument("--n_per_node", type=int, default=3)
parser.add_argument("--n_per_node", type=int, nargs=3, default=[1, 1, 1], help='n per node for train, dev, test')
parser.add_argument("--comet", action='store_true', default=False, help='create data of the form s,r -> o, where sro are triples from ATOMIC')
parser.add_argument("--pathcomet", action='store_true', default=False, help='create data of the form path,s,r -> o, where sro are triples from ATOMIC')
#parser.add_argument("--add_orig", action='store_true', default=False, help='when generating pathcomet, add the original comet triples as well (train)')
parser.add_argument("--add_orig", type=int, nargs=3, default=[0, 0, 0], help='when generating pathcomet, add the original comet triples as well (train)')

args = parser.parse_args()

print("\n### ATOMIC Data Generation ###")
pprint.pprint(vars(args))
print('\n')

assert not (args.comet and args.pathcomet)

opt = DD()
opt.dataset = "atomic"
opt.exp = "generation"
opt.data = DD()
opt.data.categories = sorted(categories)

# Additionally add our options HERE
opt.pickled_data = args.pickled_data
opt.n_train = args.n_train
opt.n_dev = args.n_dev
opt.n_test = args.n_test
opt.max_path_len = args.max_path_len
opt.n_per_node = args.n_per_node
opt.comet = args.comet
opt.pathcomet = args.pathcomet
opt.add_orig = args.add_orig
Exemplo n.º 10
0
import src.data.data as data
from utils.utils import DD
import utils.utils as utils
import random
from src.data.utils import TextEncoder
from tqdm import tqdm
import torch

# Manually change the set of categories you don't want to include
# if you want to be able to train on a separate set of categories
categories = []
categories += ["Intent"]


opt = DD()
opt.dataset = "motiv_sent"
opt.exp = "generation"
opt.data = DD()
opt.data.categories = sorted(categories)

encoder_path = "model/encoder_bpe_40000.json"
bpe_path = "model/vocab_40000.bpe"

text_encoder = TextEncoder(encoder_path, bpe_path)

encoder = text_encoder.encoder
n_vocab = len(text_encoder.encoder)

special = [data.start_token, data.end_token]
special += ["<{}>".format(cat) for cat in categories]
Exemplo n.º 11
0
def get_data_parameters(opt, experiment, dataset):#####opt.exp & opt.data used for data loader path
    data = DD() ##################
    if dataset == "atomic":
        data.categories = sorted(opt.categories)
        #ORIGINAL CODE
        # hard-coded
        data.maxe1 = 17
        data.maxe2 = 35
        data.maxr = 1
        #ADRIAN ADDED      ################THIS IS WHERE YOU CHANGE PARAMETERS FOR NAME OF LOADER YOU'RE USING
        ##############MAXE1 == MAX EVENT
        ############MAXE2 == MAX EFFECT
        data.maxe1 = 50
        data.maxe2 = 35
        data.maxr = 1
        ###
        

    elif dataset == "conceptnet":
        data.rel = opt.relation_format
        data.trainsize = opt.training_set_size
        data.devversion = opt.development_set_versions_to_use
        data.maxe1 = opt.max_event_1_size
        data.maxe2 = opt.max_event_2_size
        if data.rel == "language":
            # hard-coded
            data.maxr = 5
        else:
            # hard-coded
            data.maxr = 1

    return data############
Exemplo n.º 12
0
def load_default():
    with open(abs_path("config_default.json"), 'r') as f:
        config = json.load(f)
    config = DD(config)
    config.net = DD(config.net)
    config.train = DD(config.train)
    config.train.static = DD(config.train.static)
    config.train.dynamic = DD(config.train.dynamic)
    config.data = DD(config.data)
    config.eval = DD(config.eval)
    config.train.dynamic.epoch = 0
    return DD(config)
Exemplo n.º 13
0
def get_meta(config):
    meta = DD()
    meta.iterations = int(config.iters)
    meta.cycle = config.cycle
    return meta