Пример #1
0
 """ CMU DICT """
 batch_size = 64
 min_seq_len_X = 3
 max_seq_len_X = 20
 min_seq_len_y = min_seq_len_X
 max_seq_len_y = max_seq_len_X    
 data_folder = os.path.join("..", "..", "data", "cmudict", "ready", "gpt2")
 src_lookup_prefix = os.path.join("..", "..", "data", "cmudict", "lookup", "gpt2","src")
 tgt_lookup_prefix = os.path.join("..", "..", "data", "cmudict", "lookup", "gpt2","tgt")
 
 
 src_lookup = Lookup(type="gpt2")
 src_lookup.load(src_lookup_prefix)
 tgt_lookup = Lookup(type="gpt2")
 tgt_lookup.load(tgt_lookup_prefix)
 train_loader, valid_loader, test_loader = loader(data_folder, batch_size, src_lookup, tgt_lookup, min_seq_len_X, max_seq_len_X, min_seq_len_y, max_seq_len_y)
 
 print("Loading done, train instances {}, dev instances {}, test instances {}, vocab size src/tgt {}/{}\n".format(
     len(train_loader.dataset.X),
     len(valid_loader.dataset.X),
     len(test_loader.dataset.X),
     len(src_lookup), len(tgt_lookup)))
 # ######################################################################
 
 # GPU SELECTION ########################################################
 device = select_processing_device(verbose = True)
 # ######################################################################
 
 # MODEL TRAINING #######################################################
 
 aux_loss_weight = 0.1
Пример #2
0
 
 #data_folder = os.path.join("..", "..", "data", "task2", "ready", "gpt2")
 #src_lookup_prefix = os.path.join("..", "..", "data", "task2", "lookup", "gpt2","src")
 #tgt_lookup_prefix = os.path.join("..", "..", "data", "task2", "lookup", "gpt2","tgt")
 #src_lookup = Lookup(type="gpt2")
 #tgt_lookup = Lookup(type="gpt2")
 
 data_folder = os.path.join("..", "..", "data", "task2", "ready", "bpe")
 src_lookup_prefix = os.path.join("..", "..", "data", "task2", "lookup", "bpe","src-Business_Ethics-1024")
 tgt_lookup_prefix = os.path.join("..", "..", "data", "task2", "lookup", "bpe","src-Business_Ethics-1024")
 src_lookup = Lookup(type="bpe")
 tgt_lookup = Lookup(type="bpe")
 
 src_lookup.load(src_lookup_prefix)    
 tgt_lookup.load(tgt_lookup_prefix)
 train_loader, valid_loader, test_loader = loader(data_folder, batch_size, src_lookup, tgt_lookup, min_seq_len_X, max_seq_len_X, min_seq_len_y, max_seq_len_y, custom_filename_prefix = "Business_Ethics_")
 
 print("Loading done, train instances {}, dev instances {}, test instances {}, vocab size src/tgt {}/{}\n".format(
     len(train_loader.dataset.X),
     len(valid_loader.dataset.X),
     len(test_loader.dataset.X),
     len(src_lookup), len(tgt_lookup)))
 # ######################################################################
 
 # GPU SELECTION ########################################################
 device = select_processing_device(verbose = True)
 # ######################################################################
 
 # MODEL TRAINING #######################################################
 
 coverage_loss_weight = 0.001
Пример #3
0
    # DATA PREPARATION ######################################################
    print("Loading data ...")
    batch_size = 256
    min_seq_len_X = 10
    max_seq_len_X = 50
    min_seq_len_y = min_seq_len_X
    max_seq_len_y = max_seq_len_X

    #from data.roen.loader import loader
    #data_folder = os.path.join("..", "..", "data", "roen", "ready", "setimes.8K.bpe")
    #from data.fren.loader import loader
    from models.util.loaders.standard import loader
    data_folder = os.path.join("..", "..", "data", "fren", "ready")
    train_loader, valid_loader, test_loader, src_w2i, src_i2w, tgt_w2i, tgt_i2w = loader(
        data_folder, batch_size, min_seq_len_X, max_seq_len_X, min_seq_len_y,
        max_seq_len_y)

    print(
        "Loading done, train instances {}, dev instances {}, test instances {}, vocab size src/tgt {}/{}\n"
        .format(len(train_loader.dataset.X), len(valid_loader.dataset.X),
                len(test_loader.dataset.X), len(src_i2w), len(tgt_i2w)))

    #train_loader.dataset.X = train_loader.dataset.X[0:800]
    #train_loader.dataset.y = train_loader.dataset.y[0:800]
    #valid_loader.dataset.X = valid_loader.dataset.X[0:100]
    #valid_loader.dataset.y = valid_loader.dataset.y[0:100]
    # ######################################################################

    # GPU SELECTION ########################################################
    if torch.cuda.is_available():