Exemplo n.º 1
0
def validate(message):
    K.clear_session()
    global status
    global data
    global task
    global progress
    data = None
    status = WorkerStatus.WORKING
    task = TaskType.EVALUATE
    progress = 0
    try:
        model_path = "{}/{}".format(work_dir, message.taskName)
        try:
            shutil.rmtree(model_path)
        except FileNotFoundError:
            pass
        pathlib.Path(model_path).mkdir(parents=True, exist_ok=True)
        io_input = BytesIO(message.data)
        with tarfile.open(fileobj=io_input, mode="r:gz") as tar:
            tar.extractall(work_dir)
        model = Seq2Seq(load=True, working_dir=model_path)
        model.save_full_report()
        progress = 1
        model.evaluate_checkpoints(progress=lambda p: set_progress(p + 1),
                                   data_type=['validate'])

        compress_and_set_result(model_path)
    except Exception:
        status = WorkerStatus.ERROR
        task = TaskType.NONE
        progress = 0
        traceback.print_exc()
Exemplo n.º 2
0
def main():
    # Load data
    print('Loading data...')
    train_iter, val_iter, test_iter, DE, EN = load_dataset(batch_size=32)
    PAD_IDX = EN.vocab.stoi[EN.pad_token]

    # Build model and optimizer
    print('Building model and optimizer...')
    s2s = Seq2Seq(enc_vocab_size=len(DE.vocab),
                  dec_vocab_size=len(EN.vocab),
                  enc_embed_dim=1500,
                  dec_embed_dim=1500,
                  hidden_size=1500,
                  enc_num_layers=2,
                  dec_num_layers=2,
                  padding_idx=PAD_IDX,
                  dropout_rate=0.2)
    if USE_CUDA:
        s2s = s2s.cuda()
    optimizer = optim.SGD(params=s2s.parameters(), lr=1)
    # optimizer = optim.Adam(params=s2s.parameters(), lr=10**-2)
    # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.3, threshold=10**-3, patience=0)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[7, 14],
                                                     gamma=0.5,
                                                     last_epoch=-1)
    loss_func = nn.NLLLoss(size_average=False, ignore_index=PAD_IDX)

    # Train model
    print('Training...')
    print('-' * 102)
    N_EPOCHS = 20
    for i in range(N_EPOCHS):
        # Train a single epoch
        lr = optimizer.param_groups[0]['lr']
        start = time.time()
        train_loss = train(s2s,
                           train_iter,
                           loss_func,
                           optimizer,
                           pad_idx=PAD_IDX,
                           max_grad_norm=5)
        end = time.time()
        train_loss = train_loss[0]
        train_time = (end - start)

        # Calculate loss and ppl on validation set, adjust learning rate if needed
        val_loss = eval(s2s, val_iter, loss_func, PAD_IDX)
        val_loss = val_loss[0]
        ppl = np.exp(val_loss)
        scheduler.step()

        # Save model
        torch.save(s2s, 's2s_model.pt')

        # Print epoch update
        print(
            '| Epoch #{:2d} | train loss {:5.4f} | train time {:5.2f} | val loss {:5.4f} | val ppl {:7.4f} | lr {:4f}'
            .format(i, train_loss, train_time, val_loss, ppl, lr))
        print('-' * 102)
Exemplo n.º 3
0
def train(model_def_path, epochs=300, batch_size=64):
    model_def = load_model(model_def_path)
    records = load_records()
    model = Seq2Seq(model_def,
                    working_dir=time.strftime(
                        "./models/{}-%Y_%m_%d-%H%M%S".format(
                            model_def['name'])))
    model.save_no_train()
    model.train(data=records, epochs=epochs, batch_size=batch_size)
    model.save()
    return model
Exemplo n.º 4
0
def validate_model(path):
    with open("{}/result_train.csv".format(path), "r") as ftrain:
        train = process_csv(ftrain.readlines())
    with open("{}/result_validate.csv".format(path), "r") as fvalidate:
        validate = process_csv(fvalidate.readlines())
    full = [train[0] + validate[0], train[1] + validate[1]]

    model = Seq2Seq(load=True, working_dir=path)
    # model.save_for_inference_tf()

    train_acc = model.validate(train[0], train[1])
    model._acc_report(train_acc, len(train[0]))

    validate_acc = model.validate(validate[0], validate[1])
    model._acc_report(validate_acc, len(validate[0]))

    full_acc = model.validate(full[0], full[1])
    model._acc_report(full_acc, len(full[0]))

    return model
Exemplo n.º 5
0
def train(message):
    K.clear_session()
    global status
    global data
    global task
    global progress
    data = None
    status = WorkerStatus.WORKING
    task = TaskType.TRAIN
    progress = 0
    try:
        model_def = json.loads(message.modelDefinition)
        records = load_records()
        model_path = "{}/{}".format(work_dir, message.taskName)
        try:
            shutil.rmtree(model_path)
        except FileNotFoundError:
            pass
        model = Seq2Seq(model_def, working_dir=model_path)
        model.train(
            records,
            epochs=model_def["epochs"],
            batch_size=model_def["batch_size"],
            callbacks=[
                LambdaCallback(
                    on_epoch_end=lambda epoch, logs: set_progress(epoch + 1))
            ])
        model.save_model()
        model.save_model_def()
        model.save_no_train()
        model.save_history()
        model.save_for_inference_tf()

        compress_and_set_result(model_path)
    except Exception:
        status = WorkerStatus.ERROR
        task = TaskType.NONE
        progress = 0
        traceback.print_exc()
Exemplo n.º 6
0
        for _ in range(m_l):
            next_tok = torch.argmax(log_prob)
            translation = translation + [next_tok.item()]
            if next_tok == eosindx:
                break
            log_prob, hidden = model.decode(hidden, next_tok.item())
        translations[i] = translation
    return translations


if __name__ == "__main__":
    from Seq2Seq import Seq2Seq
    sentences = [
        "I am an apple", "There is a dog", "look a tree",
        'here is a realy long sentence just for verifying'
    ]
    words = []
    for s in sentences:
        for w in s.split():
            words.append(w)
    words = set(words)
    word2idx = {w: i for i, w in enumerate(list(words))}
    x_words = word2idx
    seq2seq = Seq2Seq(word2idx.copy(), word2idx.copy())
    translation = SimpleGreedyDecodingTranslation(seq2seq, sentences)
    #translation = GreedyDecodingTranslation(seq2seq, sentences)
    print(translation)
    print(len(translation))
    #translation = BeamDecodingTranslation(seq2seq, sentences, 2)
    print(len(translation))
Exemplo n.º 7
0
        './model_def_phase1_topology/S2S-T4-GRU-Nadam.json',
        './model_def_phase1_topology/S2S-T4-LSTM-Nadam.json',
        './model_def_phase1_topology/S2S-T5-GRU-Nadam-ReLU.json',
        './model_def_phase1_topology/S2S-T5-LSTM-Nadam-ReLU.json',
        './model_def_phase1_topology/S2S-T6-GRU-Nadam-ReLU.json',
        './model_def_phase1_topology/S2S-T6-LSTM-Nadam-ReLU.json',
        './model_def_phase2_activation/S2S-T5-GRU-Nadam-LReLU.json',
        './model_def_phase2_activation/S2S-T5-GRU-Nadam-Sigmoid.json',
        './model_def_phase2_activation/S2S-T5-GRU-Nadam-Tanh.json',
        './model_def_phase2_activation/S2S-T5-GRU-Nadam-Elu.json',
        './model_def_phase2_activation/S2S-T5-LSTM-Nadam-LReLU.json',
        './model_def_phase2_activation/S2S-T5-LSTM-Nadam-Sigmoid.json',
        './model_def_phase2_activation/S2S-T5-LSTM-Nadam-Tanh.json',
        './model_def_phase2_activation/S2S-T5-LSTM-Nadam-Elu.json',
        './model_def_phase2_activation/S2S-T6-GRU-Nadam-LReLU.json',
        './model_def_phase2_activation/S2S-T6-GRU-Nadam-Sigmoid.json',
        './model_def_phase2_activation/S2S-T6-GRU-Nadam-Tanh.json',
        './model_def_phase2_activation/S2S-T6-GRU-Nadam-Elu.json',
        './model_def_phase2_activation/S2S-T6-LSTM-Nadam-LReLU.json',
        './model_def_phase2_activation/S2S-T6-LSTM-Nadam-Sigmoid.json',
        './model_def_phase2_activation/S2S-T6-LSTM-Nadam-Tanh.json',
        './model_def_phase2_activation/S2S-T6-LSTM-Nadam-Elu.json'
    ]

    for model in models:
        for i in range(0, 2):
            mdl = train(model, 300, 64)
            K.clear_session()
            Seq2Seq(load=True, working_dir=mdl.working_dir)
            K.clear_session()
Exemplo n.º 8
0
    def __init__(self, config):
        self.restore_ckpt = None
        if 'restore_ckpt' in config:
            self.restore_ckpt = config['restore_ckpt']

        self.eval_interval = 1000
        self.save_interval = 1000
        self.show_interval = 100
        self.train_epoch = 100
        self.max_iter = 1000000

        #train dataset parse
        self.train_dataset_type = config['train_dataset_type']
        self.train_dataset = None
        if self.train_dataset_type == 'tfrecord':
            self.train_dataset = RecordDataset
        elif self.train_dataset_type == 'filelist':
            self.train_dataset = FileDataset

        train_ds_config = {}
        train_ds_config['norm_h'] = int(config['norm_h'])
        train_ds_config['expand_rate'] = float(config['expand_rate'])
        train_ds_config['file_list'] = config['train_file_list']
        train_ds_config['num_parallel'] = config['num_parallel']
        train_ds_config['batch_size'] = config['batch_size']
        train_ds_config['char_dict'] = config['char_dict']
        train_ds_config['model_type'] = config['model_type']
        train_ds_config['mode'] = 'train'

        self.train_dataset = self.train_dataset(train_ds_config).data_reader(
            self.train_epoch)

        #eval dataset parse
        self.eval_dataset_type = config['eval_dataset_type']
        self.eval_dataset = None
        if self.eval_dataset_type == 'tfrecord':
            self.eval_dataset = RecordDataset
        elif self.eval_dataset_type == 'filelist':
            self.eval_dataset = FileDataset
        eval_ds_config = {}

        eval_ds_config['norm_h'] = int(config['norm_h'])
        eval_ds_config['expand_rate'] = float(config['expand_rate'])
        eval_ds_config['file_list'] = config['eval_file_list']
        eval_ds_config['num_parallel'] = config['num_parallel']
        eval_ds_config['batch_size'] = config['batch_size']
        eval_ds_config['char_dict'] = config['char_dict']
        eval_ds_config['model_type'] = config['model_type']
        eval_ds_config['mode'] = 'test'
        self.eval_dataset = self.eval_dataset(eval_ds_config).data_reader()

        #charset parse
        self.char_dict = config['char_dict']
        self.charset = Charset(self.char_dict)
        self.step_counter = tf.Variable(tf.constant(0),
                                        trainable=False,
                                        name='step_counter')

        self.decoder_dict = {}
        self.loss_value = 0.0

        self.learning_rate = 0.01
        if 'learning_rate' in config:
            self.learning_rate = float(config['learning_rate'])

        self.learning_rate_decay = None
        if 'learning_rate_decay' in config:
            self.learning_rate_decay = config['learning_rate_decay']
        '''
        if self.learning_rate_decay == 'piecewise_constant':
            boundaries = [30000, 60000]
            values = [self.learning_rate, self.learning_rate*0.5, self.learning_rate*0.2]
            self.learning_rate = tf.train.piecewise_constant(self.step_counter, boundaries, values)
        elif self.learning_rate_decay == 'exponential_decay':
            decay_rate = 0.8
            decay_steps = 40000
            self.learning_rate = tf.train.exponential_decay(self.learning_rate,
                                                            self.step_counter,
                                                            decay_steps,
                                                            decay_rate)
        elif self.learning_rate_decay == 'linear_cosine_decay':
            decay_steps = 30000
            self.learning_rate = tf.train.linear_cosine_decay(self.learning_rate,
                                                              self.step_counter,
                                                              decay_steps)
        '''

        if 'eval_interval' in config:
            self.eval_interval = int(config['eval_interval'])
        if 'save_interval' in config:
            self.save_interval = int(config['save_interval'])
        if 'train_epoch' in config:
            self.train_epoch = int(config['train_epoch'])
        if 'max_iter' in config:
            self.max_iter = int(config['max_iter'])

        self.max_padding = 20
        self.eos_id = self.charset.get_eosid()
        self.sos_id = self.charset.get_sosid()
        self.vocab_size = self.charset.get_size()
        self.embedding_dim = 96
        self.enc_units = 128
        self.dec_units = 128
        self.seq2seq = Seq2Seq(vocab_size=self.vocab_size,
                               embedding_dim=self.embedding_dim,
                               SOS_ID=self.sos_id,
                               EOS_ID=self.eos_id,
                               dec_units=self.dec_units,
                               enc_units=self.enc_units,
                               attention_name='luong',
                               attention_type=0,
                               rnn_type='gru',
                               max_length=self.max_padding)

        self.optimizer_type = 'sgd'
        if 'optimizer_type' in config:
            self.optimizer_type = config['optimizer_type']
        if self.optimizer_type not in ('sgd', 'momentum', 'adam'):
            print(
                "Solover Error: optimizer_type {} not in [sgd, momentum, adam]"
                .format(self.optimizer_type))

        self.optimizer = tf.keras.optimizers.Adam(self.learning_rate)
        if self.optimizer_type == 'sgd':
            self.optimizer = tf.keras.optimizers.SGD(self.learning_rate)
        elif self.optimizer_type == 'momentum':
            self.optimizer = tf.keras.optimizers.SGD(self.learning_rate, 0.95)
        elif self.optimizer_type == 'adam':
            self.optimizer = tf.keras.optimizers.Adam(self.learning_rate)

        self.checkpoint_dir = 'training_checkpoints'
        if 'checkpoint_dir' in config:
            self.checkpoint_dir = config['checkpoint_dir']
        if not os.path.isdir(self.checkpoint_dir):
            os.makedirs(self.checkpoint_dir)
        self.checkpoint_prefix = os.path.join(self.checkpoint_dir, "ckpt")
        self.checkpoint = tf.train.Checkpoint(optimizer=self.optimizer,
                                              model=self.seq2seq)

        if self.restore_ckpt is not None and os.path.isdir(self.restore_ckpt):
            if tf.train.latest_checkpoint(self.restore_ckpt) is not None:
                self.checkpoint.restore(
                    tf.train.latest_checkpoint(self.restore_ckpt))
        else:
            if tf.train.latest_checkpoint(self.checkpoint_dir) is not None:
                self.checkpoint.restore(
                    tf.train.latest_checkpoint(self.checkpoint_dir))
Exemplo n.º 9
0
from Seq2Seq import Seq2Seq,data_generator
import numpy as np

train_houses = [2,3,4,5,6]
test_house = 1
#appliance = 'washer_dryer' 
appliance = 'refrigerator' 
windows_length = 600
epochs = 15

#Type = 'CNN-RNN'
#Type = 'ConvLSTM'
#Type = 'Dense'
#Type = 'CNN-2d'   # 适用 mode:'od' 
#Type = 'CNN-1d-2'
Type = 'CNN(Chaoyun)'

seq2seq = Seq2Seq('o')
seq2seq.get_houses_data(train_houses,test_house,
                        appliance,windows_length,2000,200)
seq2seq.build_network(Type)
#seq2seq.load_model_and_history(Type)

seq2seq.train(epochs=epochs)
seq2seq.save_model_and_history()
seq2seq.plot_training_history()
seq2seq._demo_show()

Exemplo n.º 10
0
    def __init__(self, config):
        self.restore_ckpt = None
        if 'restore_ckpt' in config:
            self.restore_ckpt = config['restore_ckpt']

        self.eval_interval = 1000
        self.save_interval = 10
        self.show_interval = 100
        self.train_epoch = 100
        self.max_iter = 1000000

        #train dataset parse
        self.train_dataset_type = config['train_dataset_type']
        self.train_dataset = None
        if self.train_dataset_type == 'tfrecord':
            self.train_dataset = RecordDataset
        elif self.train_dataset_type == 'filelist':
            self.train_dataset = FileDataset

        train_ds_config = {}
        train_ds_config['norm_h']         =      int(config['norm_h'])
        train_ds_config['expand_rate']    =      float(config['expand_rate'])
        train_ds_config['file_list']      =      config['train_file_list']
        train_ds_config['num_parallel']   =      config['num_parallel']
        train_ds_config['batch_size']     =      config['batch_size']
        train_ds_config['char_dict']      =      config['char_dict']
        train_ds_config['model_type']     =      config['model_type']
        train_ds_config['mode'] = 'train'

        self.train_dataset = self.train_dataset(train_ds_config).data_reader_ctc_attention(self.train_epoch)

        #eval dataset parse
        self.eval_dataset_type = config['eval_dataset_type']
        self.eval_dataset = None
        if self.eval_dataset_type == 'tfrecord':
            self.eval_dataset = RecordDataset
        elif self.eval_dataset_type == 'filelist':
            self.eval_dataset = FileDataset
        eval_ds_config = {}

        eval_ds_config['norm_h']         =      int(config['norm_h'])
        eval_ds_config['expand_rate']    =      float(config['expand_rate'])
        eval_ds_config['file_list']      =      config['eval_file_list']
        eval_ds_config['num_parallel']   =      config['num_parallel']
        eval_ds_config['batch_size']     =      config['batch_size']
        eval_ds_config['char_dict']      =      config['char_dict']
        eval_ds_config['model_type']     =      config['model_type']
        eval_ds_config['mode'] = 'test'
        self.eval_dataset = self.eval_dataset(eval_ds_config).data_reader_ctc_attention()
        
        #charset parse
        self.char_dict = config['char_dict']
        self.model_type = config['model_type']
        self.charset = Charset(self.char_dict, self.model_type)
        self.step_counter = tf.train.get_or_create_global_step()

        self.decoder_dict = {}
        self.loss_value = 0.0

        self.learning_rate = 0.01
        if 'learning_rate' in config:
            self.learning_rate = float(config['learning_rate'])

        self.learning_rate_decay = None
        if 'learning_rate_decay' in config:
            self.learning_rate_decay = config['learning_rate_decay']

        if self.learning_rate_decay == 'piecewise_constant':
            boundaries = [30000, 60000]
            values = [self.learning_rate, self.learning_rate*0.5, self.learning_rate*0.2]
            self.learning_rate = tf.train.piecewise_constant(self.step_counter, boundaries, values)
        elif self.learning_rate_decay == 'exponential_decay':
            decay_rate = 0.8
            decay_steps = 40000
            self.learning_rate = tf.train.exponential_decay(self.learning_rate,
                                                            self.step_counter,
                                                            decay_steps,
                                                            decay_rate)
        elif self.learning_rate_decay == 'linear_cosine_decay':
            decay_steps = 30000
            self.learning_rate = tf.train.linear_cosine_decay(self.learning_rate,
                                                              self.step_counter,
                                                              decay_steps)

        if 'eval_interval' in config:
            self.eval_interval = int(config['eval_interval'])
        if 'save_interval' in config:
            self.save_interval = int(config['save_interval'])
        if 'train_epoch' in config:
            self.train_epoch = int(config['train_epoch'])
        if 'max_iter' in config:
            self.max_iter = int(config['max_iter'])

        self.max_dec_length = 20
        self.max_enc_length = 1200
        self.eos_id = self.charset.get_eosid()
        self.sos_id = self.charset.get_sosid()
        self.vocab_size = self.charset.get_size()
        self.enc_used_rnn = False
        self.enc_num_layers = 0
        self.dec_num_layers = 1
        self.d_model = 512
        self.enc_num_heads = 4
        self.dec_num_heads = 4
        self.enc_dff = 1024
        self.dec_dff = 1024
        self.enc_rate = 0.0
        self.dec_rate = 0.0

        self.seq2seq = Seq2Seq(enc_num_layers=self.enc_num_layers,
                               dec_num_layers=self.dec_num_layers,
                               d_model=self.d_model,
                               vocab_size=self.vocab_size,
                               enc_num_heads=self.enc_num_heads,
                               dec_num_heads=self.dec_num_heads,
                               enc_dff=self.enc_dff,
                               dec_dff=self.dec_dff,
                               enc_used_rnn=self.enc_used_rnn,
                               sos_id=self.sos_id,
                               eos_id=self.eos_id,
                               max_enc_length=self.max_enc_length,
                               max_dec_length=self.max_dec_length,
                               enc_rate=self.enc_rate,
                               dec_rate=self.dec_rate)

        self.optimizer_type = 'adam'
        if 'optimizer_type' in config:
            self.optimizer_type = config['optimizer_type']
        if self.optimizer_type not in ('sgd', 'momentum', 'adam'):
            print("Solover Error: optimizer_type {} not in [sgd, momentum, adam]".format(self.optimizer_type))

        self.optimizer = tf.train.AdamOptimizer(self.learning_rate)
        if self.optimizer_type == 'sgd':
            self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        elif self.optimizer_type == 'momentum':
            self.optimizer = tf.train.MomentumOptimizer(self.learning_rate, 0.95)
        elif self.optimizer_type == 'adam':
            self.optimizer = tf.train.AdamOptimizer(self.learning_rate)

        self.checkpoint_dir = 'training_checkpoints'
        if 'checkpoint_dir' in config:
            self.checkpoint_dir = config['checkpoint_dir']
        if not os.path.isdir(self.checkpoint_dir):
            os.makedirs(self.checkpoint_dir)
        self.checkpoint_prefix = os.path.join(self.checkpoint_dir, "ckpt")
        self.checkpoint = tf.train.Checkpoint(optimizer=self.optimizer,
                                              seq2seq=self.seq2seq,
                                              global_step=self.step_counter)

        if self.restore_ckpt is not None and os.path.isdir(self.restore_ckpt):
            if tf.train.latest_checkpoint(self.restore_ckpt) is not None:
                self.checkpoint.restore(tf.train.latest_checkpoint(self.restore_ckpt))
        else:
            if tf.train.latest_checkpoint(self.checkpoint_dir) is not None:
                self.checkpoint.restore(tf.train.latest_checkpoint(self.checkpoint_dir))
Exemplo n.º 11
0
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)



torch.backends.cudnn.enabled = False
print("Input dim: ", n_vocab)


################################# DEFINE MODEL ################################
model = Seq2Seq(n_vocab = n_vocab, 
                n_speaker = n_speaker,
                n_tags = n_tags,
                n_embed_text = 128, 
                n_embed_speaker = 64, 
                n_embed_tags = 128, 
                n_embed_dec = 2, 
                n_hidden_enc = 128, 
                n_hidden_dec = 128, 
                n_layers = 1, 
                n_output = 2, 
                dropout = 0.5)

model = model.cuda()
print(f'The model has {count_parameters(model):,} trainable parameters')
'''
# set single/multi gpu usage
if not args.gpu is None:  
    torch.cuda.manual_seed(0)
    
    if len(args.gpu) == 1:
        torch.cuda.set_device(int(args.gpu))
Exemplo n.º 12
0
def main():
    # TODO: Add a function to parse arguments.

    # Global setting here.
    mode_list = ["train", "visualization", "analysis"]
    mode = mode_list[0]
    task = "control_length"
    # Each model has its own file. User can add information (epochs, randon seed, task) in the file name.
    model_path = "../saved_model/control_length_units=4"

    if mode == "train" or mode == "analysis":
        units = 4

    if mode == 'train':
        # Train a Seq2Seq model.
        # Define other variables only used in train mode.
        epochs = 10
        log_file = "training_control_length_units=4.txt"

        seq2seq = Seq2Seq(mode, task, model_path, log_file, units, epochs)
        seq2seq.seq2seq_model, seq2seq.encoder_model, seq2seq.decoder_model = build_model.seq2seq(
            seq2seq.src_max_len,
            seq2seq.tgt_max_len,
            seq2seq.src_token_size,
            seq2seq.tgt_token_size,
            latent_dim=seq2seq.units)
        seq2seq.train()
        seq2seq.check_accuracy(check_list=["word", "length"
                                           ])  # Check accuracy after training.
        print("data =", seq2seq.data)
        print("units =", seq2seq.units)
        print("=" * 50)

    if mode == "analysis":
        # Evaluate a trained Seq2Seq and get trained weights and hidden values from the model.
        seq2seq = Seq2Seq(mode, task, model_path, units=units)
        seq2seq.seq2seq_model, seq2seq.encoder_model, seq2seq.decoder_model = build_model.seq2seq(
            seq2seq.src_max_len,
            seq2seq.tgt_max_len,
            seq2seq.src_token_size,
            seq2seq.tgt_token_size,
            latent_dim=seq2seq.units)
        seq2seq.load_seq2seq(model_path)
        # seq2seq.check_accuracy(check_list=["word", "length"])

        # Get the trained weights and save them.
        weights = analysis.get_gru_weights(seq2seq)
        with open(os.path.join(model_path, 'gate_values'),
                  'rb') as filehandler:
            gate_values = pickle.load(filehandler)
        analysis_weight.main(weights, gate_values)
        return

        # Get hidden state by their labels and save them.
        # TODO: Parse N (sample number of each label and each time step) into this function.
        sample = analysis.get_sample(task)

        # Hidden values in the last fully connected layer.
        dense_values = analysis.get_dense_values(seq2seq, sample)
        with open(os.path.join(model_path, 'dense_values'),
                  'wb') as filehandler:
            print('(Main) dense_values =',
                  dense_values.shape)  # Shape = (10, 12, 100, 8)
            pickle.dump(dense_values, filehandler)

        # Hidden values in the decoder GRU layer.
        hidden_state = analysis.get_hidden_state(
            seq2seq, sample)  # Shape = (10, 100, 12, 16)
        hidden_state = hidden_state.transpose([0, 2, 1, 3])
        with open(os.path.join(model_path, 'hidden_state'),
                  'wb') as filehandler:
            print('(Main) hidden_state =',
                  hidden_state.shape)  # Shape = (10, 12, 100, 16)
            pickle.dump(hidden_state, filehandler)

        # Gate values in the decoder GRU layer.
        gate_values = analysis.get_gate_values(seq2seq, sample)
        with open(os.path.join(model_path, 'gate_values'),
                  'wb') as filehandler:
            print('(Main) gate_values[z] =',
                  gate_values['z'].shape)  # Shape = (10, 12, 100, 16)
            pickle.dump(gate_values, filehandler)

        # pdb.set_trace()
        # analysis.dim_reduction_plot(hidden_state, sample)

    if mode == 'visualization':
        # Load the hidden state and visualize it.
        # TODO: Move this part to visualization.main().
        # Load values.
        with open(os.path.join(model_path, 'gate_values'),
                  'rb') as filehandler:
            gate_values = pickle.load(filehandler)
        if task == "control_length":
            title = ['Length=' + str(i) for i in range(1, 11)]
            for gate in ['h', 'z', 'r', 'hh']:
                visualization.plot_xy(gate_values[gate],
                                      title,
                                      name='%s' % gate)
                # return

            for gate in ['h', 'z', 'r', 'hh']:
                visualization.scatter_gate_values(gate_values[gate], name=gate)
                break
Exemplo n.º 13
0
    options = None
    run_metadata = None
    # options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
    # run_metadata = tf.RunMetadata()
    train_timeline_fname = 'timeline_01.json'
    valid_timeline_fname = "timeline_infer_1s"

    if params_common["model_name"][0] == "Transformer":
        model_params = params_models[params_common["model_name"][0]][
            params_common["model_name"][1]]
        model_params.update(vocab_size=max(params_common["source_vocab_size"],
                                           params_common["target_vocab_size"]))
        model = Transformer(params_common, model_params)
    else:
        model_params = params_models[params_common["model_name"][0]]
        model = Seq2Seq(params_common, model_params)
    print(model_params)

    if params_common["mode"] == "train":

        train_iter = BatchIterator(path_train_x, path_train_y, source_vocab2id,
                                   target_vocab2id, params_common["start_id"],
                                   params_common["end_id"],
                                   params_common["unk_id"],
                                   params_common["pad_id"],
                                   params_common["reverse_target"])

        valid_iter = BatchIterator(path_valid_x, path_valid_y, source_vocab2id,
                                   target_vocab2id, params_common["start_id"],
                                   params_common["end_id"],
                                   params_common["unk_id"],
Exemplo n.º 14
0
DEVICE = 'cpu'

# initialize encoder, decoder and seq2seq model classes
enc = Encoder(INPUT_DIM,
              ENC_HID_DIM,
              N_LAYERS,
              ENC_DROPOUT,
              is_bidirectional=False)
attn = CosAtt(enc, DEC_HID_DIM, N_LAYERS)
dec = Decoder(OUTPUT_DIM,
              DEC_HID_DIM,
              N_LAYERS,
              DEC_DROPOUT,
              enc,
              attention=attn)
model = Seq2Seq(enc, dec, DEVICE, attention=attn)


# initialize values of learnable parameters
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)


model.apply(init_weights)


# count parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
Exemplo n.º 15
0
if __name__ == '__main__':
    source_path = '../data/test/source.txt'
    target_path = '../data/test/target.txt'
    vocab_path = '../data/vocab.txt'
    model_path = '../model/model.pth'
    tokenizer = Tokenizer(vocab_path)
    config = Config()
    fr = open('../result/test.txt','w',encoding='utf-8-sig') # 存储预测结果

    loader = DataLoader(dataset=MyDataSet(source_path, target_path, tokenizer), batch_size=config.batch_size, shuffle=True,
                        num_workers=2,collate_fn=pad,drop_last=False) # 最后一个batch数据集不丢弃
    if not torch.cuda.is_available():
        print('No cuda is available!')
        exit()
    device = torch.device('cuda:0')
    model = Seq2Seq(config)
    model.to(device)
    # 加载模型
    checkpoint = torch.load(model_path,map_location=device)
    model.load_state_dict(checkpoint['model'])

    for iter, (batch_x, batch_y, batch_source_lens,batch_target_lens) in enumerate(loader):
        batch_x = batch_x.cuda()
        batch_source_lens = torch.as_tensor(batch_source_lens)
        # 预测结果和相应时刻的注意力权重
        results = model.BatchSample(batch_x,batch_source_lens)
        for i in range(len(results)):
            words = tokenizer.convert_ids_to_tokens(results[i])
            if i % 100 == 0:
                print(''.join(words))
            fr.write(''.join(words))
Exemplo n.º 16
0
    def __init__(self, config):
        self.restore_ckpt = None
        if 'restore_ckpt' in config:
            self.restore_ckpt = config['restore_ckpt']

        self.clip_max_gradient = 3.0
        if 'clip_max_gradient' in config:
            self.clip_max_gradient = float(config['clip_max_gradient'])

        self.clip_min_gradient = -3.0
        if 'clip_min_gradient' in config:
            self.clip_min_gradient = float(config['clip_min_gradient'])

        self.eval_interval = 2000
        self.save_interval = 1000
        self.show_interval = 10
        self.train_epoch = 100
        self.max_iter = 1000000

        self.mirrored_strategy = None
        self.num_replicas_in_sync = 1
        self.batch_per_replica = int(config['batch_size'])
        self.devices = configs['devices'].split(',')
        assert(len(self.devices) > 1)
        self.mirrored_strategy = tf.distribute.MirroredStrategy(devices=self.devices)
        self.num_replicas_in_sync = self.mirrored_strategy.num_replicas_in_sync
        self.global_batch_size = self.batch_per_replica * self.num_replicas_in_sync

        #train dataset parse
        self.train_dataset_type = config['train_dataset_type']
        self.train_dataset = None
        if self.train_dataset_type == 'tfrecord':
            self.train_dataset = RecordDataset
        elif self.train_dataset_type == 'filelist':
            self.train_dataset = FileDataset

        train_ds_config = dict()
        train_ds_config['norm_h'] = int(config['norm_h'])
        train_ds_config['expand_rate'] = float(config['expand_rate'])
        train_ds_config['file_list'] = config['train_file_list']
        train_ds_config['num_parallel'] = config['num_parallel']
        train_ds_config['batch_size'] = self.global_batch_size
        train_ds_config['model_type'] = config['model_type']
        train_ds_config['char_dict'] = config['char_dict']
        train_ds_config['mode'] = 'train'

        self.train_dataset = self.train_dataset(train_ds_config).data_reader(self.train_epoch)

        #eval dataset parse
        self.eval_dataset_type = config['eval_dataset_type']
        self.eval_dataset = None
        if self.eval_dataset_type == 'tfrecord':
            self.eval_dataset = RecordDataset
        elif self.eval_dataset_type == 'filelist':
            self.eval_dataset = FileDataset

        eval_ds_config = dict()

        eval_ds_config['norm_h'] = int(config['norm_h'])
        eval_ds_config['expand_rate'] = float(config['expand_rate'])
        eval_ds_config['file_list'] = config['eval_file_list']
        eval_ds_config['num_parallel'] = config['num_parallel']
        eval_ds_config['batch_size'] = self.global_batch_size
        eval_ds_config['char_dict'] = config['char_dict']
        eval_ds_config['model_type'] = config['model_type']
        eval_ds_config['mode'] = 'test'
        self.eval_dataset = self.eval_dataset(eval_ds_config).data_reader()

        self.train_dataset = self.mirrored_strategy.experimental_distribute_dataset(self.train_dataset)
        self.eval_dataset = self.mirrored_strategy.experimental_distribute_dataset(self.eval_dataset)

        #charset parse
        self.char_dict = config['char_dict']
        self.model_type = config['model_type']
        self.charset = Charset(self.char_dict, self.model_type)
        self.step_counter = 0
        #self.step_counter = tf.Variable(tf.constant(0), trainable=False, name='step_counter')

        self.learning_rate = 0.01
        if 'learning_rate' in config:
            self.learning_rate = float(config['learning_rate'])

        lr_decay_steps = 200000
        lr_decay_rate = 0.96
        if 'lr_decay_steps' in configs:
            lr_decay_steps = int(configs['lr_decay_steps'])
        if 'lr_decay_rate' in configs:
            lr_decay_rate = float(configs['lr_decay_rate'])

        self.schedules_type = 'ExponentialDecay'
        if self.schedules_type == 'ExponentialDecay':
            self.learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(self.learning_rate,
                                                                                lr_decay_steps,
                                                                                lr_decay_rate)
        elif self.schedules_type == 'InverseTimeDecay':
            self.learning_rate = tf.keras.optimizers.schedules.InverseTimeDecay(self.learning_rate,
                                                                                lr_decay_steps,
                                                                                lr_decay_rate)

        if 'eval_interval' in config:
            self.eval_interval = int(config['eval_interval'])
        if 'save_interval' in config:
            self.save_interval = int(config['save_interval'])
        if 'train_epoch' in config:
            self.train_epoch = int(config['train_epoch'])
        if 'max_iter' in config:
            self.max_iter = int(config['max_iter'])

        self.optimizer_type = 'adam'
        if 'optimizer_type' in config:
            self.optimizer_type = config['optimizer_type']

        if self.optimizer_type not in ('sgd', 'momentum', 'adam', 'rmsprop', 'adadelte'):
            print("Solover Error: optimizer_type {} not in [sgd, momentum, adam, rmsprop, adadelte]".format(self.optimizer_type))

        self.eos_id = self.charset.get_eosid()
        self.vocab_size = self.charset.get_size()
        with self.mirrored_strategy.scope():
            self.seq2seq = Seq2Seq(vocab_size=self.vocab_size, eos_id=self.eos_id)
            self.train_loss = tf.keras.metrics.Mean('train_loss')
            self.seq_err_cnt = tf.keras.metrics.Sum('seqerr_count')
            self.seq_all_cnt = tf.keras.metrics.Sum('seqerr_count')
            self.char_err_cnt = tf.keras.metrics.Sum('charerr_count')
            self.char_all_cnt = tf.keras.metrics.Sum('charall_count')

            self.optimizer = tf.keras.optimizers.Adam(self.learning_rate)
            if self.optimizer_type == 'sgd':
                self.optimizer = tf.keras.optimizers.SGD(self.learning_rate)
            elif self.optimizer_type == 'momentum':
                self.optimizer = tf.keras.optimizers.SGD(self.learning_rate, 0.95)
            elif self.optimizer_type == 'adam':
                self.optimizer = tf.keras.optimizers.Adam(self.learning_rate)
            elif self.optimizer_type == 'rmsprop':
                self.optimizer = tf.keras.optimizers.RMSprop(self.learning_rate)
            elif self.optimizer_type == 'adadelte':
                self.optimizer = tf.keras.optimizers.Adadelta(self.learning_rate)

            self.checkpoint_dir = 'training_checkpoints'
            if 'checkpoint_dir' in config:
                self.checkpoint_dir = config['checkpoint_dir']
            if not os.path.isdir(self.checkpoint_dir):
                os.makedirs(self.checkpoint_dir)
            self.checkpoint_prefix = os.path.join(self.checkpoint_dir, "ckpt")
            self.checkpoint = tf.train.Checkpoint(optimizer=self.optimizer, model=self.seq2seq)

            if self.restore_ckpt is not None and os.path.isdir(self.restore_ckpt):
                if tf.train.latest_checkpoint(self.restore_ckpt) is not None:
                    self.checkpoint.restore(tf.train.latest_checkpoint(self.restore_ckpt))
            else:
                if tf.train.latest_checkpoint(self.checkpoint_dir) is not None:
                    self.checkpoint.restore(tf.train.latest_checkpoint(self.checkpoint_dir))
Exemplo n.º 17
0
                        batch_size=config.batch_size,
                        shuffle=True,
                        num_workers=0,
                        collate_fn=pad,
                        drop_last=False)  # 最后一个batch数据集不丢弃
    # 评估集
    eval_loader = DataLoader(dataset=MyDataSet(eval_source_path,
                                               eval_target_path,
                                               eval_keyword_path, tokenizer),
                             batch_size=config.batch_size,
                             shuffle=True,
                             num_workers=0,
                             collate_fn=pad,
                             drop_last=False)  # 最后一个batch数据集不丢弃

    model = Seq2Seq(config, bert_model)
    model.to(device)
    checkpoint = torch.load(pre_trainModel)
    model.load_state_dict(checkpoint['model'])
    #optimizer = optim.SGD(model.parameters(),lr=config.learning_rate)
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    optimizer.load_state_dict(checkpoint['optimizer'])
    criterion = nn.CrossEntropyLoss()

    total_loss_criterion = 0
    PAD_ID = 0
    print_every = 20000
    num_training_steps = 600000  # 至少训练多少个step
    print_every_loss = []  # 记录每print_every个step的batch大小数据的平均损失,用于输出查看损失曲线
    num_eval = 8  # 评估集最少评估次数
    Rouge_threshold = 3  # 评估集上前后num_eval次评估时,ROUGE差值若低于该值,则满足早停条件,ROUGE采用百分制
Exemplo n.º 18
0
#TransformerParameters

emsize = args.transformer_embedding_dim  # 200 embedding dimension
nhid = args.transformer_hidden_dim  #200 the dimension of the feedforward network model in nn.TransformerEncoder
nlayers = args.transformer_n_layers  #2 the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
nhead = args.transformer_n_head  #2 the number of heads in the multiheadattention models
dropout = args.transformer_dropout  # 0.2 the dropout value

# encoder
if args.model == 'seq2seq':
    enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS,
                  ENC_DROPOUT).to(device)
    dec = Decoder(DEC_EMB_DIM, OUTPUT_DIM, HID_DIM, N_LAYERS,
                  DEC_DROPOUT).to(device)
    model = Seq2Seq(enc, dec, attn=False).to(device)
    optimizer = optim.Adam(model.parameters())
elif args.model == 'hred':
    enc = RecurrentEncoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS,
                           ENC_DROPOUT).to(device)
    dec = AttnDecoder(DEC_EMB_DIM, OUTPUT_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT,
                      MAX_LENGTH).to(device)
    model = Seq2Seq(enc, dec, attn=True).to(device)
    optimizer = optim.Adam(model.parameters())
elif args.model == 'seq2seq_attn':
    enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, HRED_N_LAYERS,
                  ENC_DROPOUT).to(device)
    dec = AttnDecoder(DEC_EMB_DIM, OUTPUT_DIM, HID_DIM, HRED_N_LAYERS,
                      DEC_DROPOUT, MAX_LENGTH).to(device)
    model = Seq2Seq(enc, dec, attn=True).to(device)
    optimizer = optim.Adam(model.parameters())
Exemplo n.º 19
0
    return elapsed_mins, elapsed_secs


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


torch.backends.cudnn.enabled = False
print("Input dim: ", n_vocab)

################################# DEFINE MODEL ################################
model = Seq2Seq(n_vocab=n_vocab,
                n_embed_text=128,
                n_embed_dec=2,
                n_hidden_enc=128,
                n_hidden_dec=128,
                n_layers=1,
                n_output=2,
                dropout=0.5)

model = model.cuda()
print(f'The model has {count_parameters(model):,} trainable parameters')
if args.load_from > 0:
    model.load_state_dict(torch.load("_epoch_" + str(args.load_from) + ".pt"))

########################### DEFINE LOSS & OPTIMIZER ###########################
positive_label_weights = torch.tensor([args.pos_weight]).cuda()
#criterion = nn.BCEWithLogitsLoss(pos_weight=positive_label_weights)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),
                       lr=0.001,
# intialization hyper-par
INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
ENC_EMB_DIM = 100
DEC_EMB_DIM = 100
ENC_HID_DIM = 2
DEC_HID_DIM = 2
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5
device = 'cpu'#torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# initialize encoder, decoder and model onject
enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, DEC_HID_DIM, N_LAYERS, DEC_DROPOUT)
model = Seq2Seq(enc, dec, device).to(device)

# initialize encoder, decoder, model objects with attention
attn = Attention(ENC_HID_DIM, DEC_HID_DIM)
enc_a = EncoderWAttention(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, N_LAYERS, ENC_DROPOUT)
dec_a = DecoderWAttention(OUTPUT_DIM, DEC_EMB_DIM, DEC_HID_DIM, ENC_HID_DIM, DEC_DROPOUT, attn)
model_a = Seq2SeqWAttention(enc_a, dec_a, device).to(device)

# initialize weights
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
model.apply(init_weights)
model_a.apply(init_weights)

# count parameters