def model_fn(features, labels, mode, params): model = GRUModel(params.feature_size, params.gru_num_units, params.attention_size) if (params.window_size == 1): logits = model.build_full_model_without_attention( features, params.feature_extract_layers) else: logits = model.build_full_model_with_window( features, params.feature_extract_layers, params.window_size) if mode == tf.estimator.ModeKeys.TRAIN: labels = tf.one_hot(labels, 2) epsilon = 1e-8 if params.focal_loss: loss = tf.reduce_mean(-tf.reduce_sum(tf.pow(1 - logits, 2) * labels * tf.log(logits + epsilon), reduction_indices=[1])) else: loss = tf.reduce_mean(-tf.reduce_sum( labels * tf.log(logits + epsilon), reduction_indices=[1])) def learning_rate_decay_fn(learning_rate, global_step): return tf.train.exponential_decay(learning_rate, global_step, decay_steps=2000, decay_rate=0.9) tv = tf.trainable_variables() regularization_cost = 5e-4 * tf.reduce_sum( [tf.nn.l2_loss(v) for v in tv]) tf.summary.scalar("regularization_cost", regularization_cost) optimizer = tf.train.AdamOptimizer(params.learning_rate) train_op = tf.contrib.layers.optimize_loss( loss=loss + regularization_cost, global_step=tf.train.get_global_step(), learning_rate=params.learning_rate, optimizer=optimizer, learning_rate_decay_fn=learning_rate_decay_fn, clip_gradients=params.clip_gradients) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.PREDICT: predictions = {'probabilities': logits} return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) else: raise NotImplementedError()
D_p, D_e, D_h, D_a, n_classes=n_classes, listener_state=args.active_listener, context_attention=args.attention, dropout_rec=args.rec_dropout, dropout=args.dropout) print('Basic Dialog RNN Model.') elif args.base_model == 'GRU': model = GRUModel(D_m, D_e, D_h, n_classes=n_classes, dropout=args.dropout) print('Basic GRU Model.') elif args.base_model == 'LSTM': model = LSTMModel(D_m, D_e, D_h, n_classes=n_classes, dropout=args.dropout) print('Basic LSTM Model.') else:
def __init__(self, env, obs_space, action_space, ignoreLTL, gnn_type, dumb_ac, freeze_ltl): super().__init__() # Decide which components are enabled self.use_progression_info = "progress_info" in obs_space self.use_text = not ignoreLTL and (gnn_type == "GRU" or gnn_type == "LSTM") and "text" in obs_space self.use_ast = not ignoreLTL and ("GCN" in gnn_type) and "text" in obs_space self.gnn_type = gnn_type self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.action_space = action_space self.dumb_ac = dumb_ac self.freeze_pretrained_params = freeze_ltl if self.freeze_pretrained_params: print("Freezing the LTL module.") self.env_model = getEnvModel(env, obs_space) # Define text embedding if self.use_progression_info: self.text_embedding_size = 32 self.simple_encoder = nn.Sequential( nn.Linear(obs_space["progress_info"], 64), nn.Tanh(), nn.Linear(64, self.text_embedding_size), nn.Tanh()).to(self.device) print( "Linear encoder Number of parameters:", sum(p.numel() for p in self.simple_encoder.parameters() if p.requires_grad)) elif self.use_text: self.word_embedding_size = 32 self.text_embedding_size = 32 if self.gnn_type == "GRU": self.text_rnn = GRUModel( obs_space["text"], self.word_embedding_size, 16, self.text_embedding_size).to(self.device) else: assert (self.gnn_type == "LSTM") self.text_rnn = LSTMModel( obs_space["text"], self.word_embedding_size, 16, self.text_embedding_size).to(self.device) print( "RNN Number of parameters:", sum(p.numel() for p in self.text_rnn.parameters() if p.requires_grad)) elif self.use_ast: hidden_dim = 32 self.text_embedding_size = 32 self.gnn = GNNMaker(self.gnn_type, obs_space["text"], self.text_embedding_size).to(self.device) print( "GNN Number of parameters:", sum(p.numel() for p in self.gnn.parameters() if p.requires_grad)) # Memory specific code. self.image_embedding_size = self.env_model.size() self.memory_rnn = nn.LSTMCell(self.image_embedding_size, self.semi_memory_size) self.embedding_size = self.semi_memory_size print("embedding size:", self.embedding_size) if self.use_text or self.use_ast or self.use_progression_info: self.embedding_size += self.text_embedding_size if self.dumb_ac: # Define actor's model self.actor = PolicyNetwork(self.embedding_size, self.action_space) # Define critic's model self.critic = nn.Sequential(nn.Linear(self.embedding_size, 1)) else: # Define actor's model self.actor = PolicyNetwork(self.embedding_size, self.action_space, hiddens=[64, 64, 64], activation=nn.ReLU()) # Define critic's model self.critic = nn.Sequential(nn.Linear(self.embedding_size, 64), nn.Tanh(), nn.Linear(64, 64), nn.Tanh(), nn.Linear(64, 1)) # Initialize parameters correctly self.apply(init_params)
def train(trainX, trainY, epoch, lr, batchSize, modelPath, lookBack, method): lossFilePath = "../model/loss_ResRNN-4.pkl" output = open(lossFilePath, 'wb') lossList = [] n = trainX.shape[0] print("trainx num is:", n) batchNum = n // batchSize - 1 print("batch num is:", batchNum) if method == "RNN": net = RNNModel(inputDim=1, hiddenNum=100, outputDim=1, layerNum=1, cell="RNN") if method == "LSTM": net = LSTMModel(inputDim=1, hiddenNum=100, outputDim=1, layerNum=1, cell="LSTM") if method == "GRU": net = GRUModel(inputDim=1, hiddenNum=100, outputDim=1, layerNum=1, cell="GRU") if method == "ResRNN": #net = ResidualRNNModel(inputDim=1, hiddenNum=100, outputDim=1, layerNum=1, cell="RNNCell") net = ResRNNModel(inputDim=1, hiddenNum=100, outputDim=1, resDepth=-1) if method == "attention": net = AttentionRNNModel(inputDim=1, hiddenNum=100, outputDim=1, seqLen=lookBack) if method == "ANN": net = ANNModel(inputDim=lookBack, hiddenNum=100, outputDim=1) if method == "new": net = DecompositionNetModel(inputDim=lookBack, fchiddenNum=100, rnnhiddenNum=100, outputDim=1) optimizer = optim.RMSprop(net.parameters(), lr=lr, momentum=0.9) #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min') #optimizer = optim.SGD(net.parameters(), lr=0.001) t1 = time.time() for i in range(epoch): trainX, trainY = shuffle(trainX, trainY, random_state=epoch) batchStart = 0 lossSum = 0 for j in range(batchNum): x = trainX[batchStart:batchStart + batchSize, :, :] y = trainY[batchStart:batchStart + batchSize] x = torch.from_numpy(x) y = torch.from_numpy(y) x, y = Variable(x), Variable(y) optimizer.zero_grad() if method == "new": pred = net.forward(x, batchSize=batchSize) # criterion = nn.MSELoss() #loss = criterion(pred, y) loss = MSE_Loss(pred, y) else: pred = net.forward(x, batchSize=batchSize) criterion = nn.MSELoss() loss = criterion(pred, y) lossSum += loss.data.numpy()[0] if j % 30 == 0 and j != 0: print("current loss is:", lossSum / 10) lossList.append(lossSum / 10) lossSum = 0 #net.zero_grad() loss.backward() optimizer.step() #scheduler.step(loss) batchStart += batchSize print("%d epoch is finished!" % i) t2 = time.time() print("train time:", t2 - t1) p.dump(lossList, output, -1) torch.save(net, modelPath)
def create_batch(tensor, batch_size): return [tensor] * batch_size batch_zero_parah = create_batch(zero_parah, batch_size) batch_zero_input = create_batch(zero_input, batch_size) batch_zero_state = create_batch(zero_state, batch_size) costs = [] sess = tf.Session() model = GRUModel(input_c, input_q, input_r, input_w, state, dropout, num_hidden=vec_len) model.load(sess, save_dir='save', dataset=dataset) # ================================================== def encode(v, q): prev = batch_zero_state for x in q: batch_q = create_batch([x], batch_size) # each word from vq for y in v: batch_w = create_batch([y], batch_size) # each word from vc prev = sess.run( model.prediction, {
unk_token='[UNK]', pad_token='[PAD]') # Loads dataset. train_ds, dev_ds, test_ds = load_dataset("chnsenticorp", splits=["train", "dev", "test"]) # Constructs the newtork. network = args.network.lower() vocab_size = len(vocab) num_classes = len(train_ds.label_list) pad_token_id = vocab.to_indices('[PAD]') if network == 'bow': model = BoWModel(vocab_size, num_classes, padding_idx=pad_token_id) elif network == 'bigru': model = GRUModel(vocab_size, num_classes, direction='bidirect', padding_idx=pad_token_id) elif network == 'bilstm': model = LSTMModel(vocab_size, num_classes, direction='bidirect', padding_idx=pad_token_id) elif network == 'bilstm_attn': lstm_hidden_size = 196 attention = SelfInteractiveAttention(hidden_size=2 * stm_hidden_size) model = BiLSTMAttentionModel(attention_layer=attention, vocab_size=vocab_size, lstm_hidden_size=lstm_hidden_size, num_classes=num_classes, padding_idx=pad_token_id) elif network == 'birnn':
trainset = dsets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True) testset = dsets.MNIST(root='./data', train=False, transform=transforms.ToTensor()) batch_size = 100 trainloader = torch.utils.data.DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True) testloader = torch.utils.data.DataLoader(dataset=testset, batch_size=batch_size, shuffle=False) n_in = 28 n_hidden = 100 n_out = 10 seq_dim = 28 use_gpu = True #model = LSTMModel(n_in, n_hidden, n_out, batch_size, use_gpu) model = GRUModel(n_in, n_hidden, n_out, batch_size, use_gpu) if use_gpu: model.cuda() l2 = 0.0 lr = 0.001 epochs = 10 optim = 'adam' modeleval = ModelEvaluator(model, epochs, lr, batch_size, l2, use_gpu, optim) acc_ = modeleval.evaluator(trainloader, testloader, seq_dim, n_in)
from utils import graph_spectrogram from model import GRUModel Tx = 5511 Ty = 1375 n_freq = 101 if __name__ == "__main__": m = GRUModel("./config.yaml")
print("Vocabulary Size: {:d}".format(len(vocab.vocabulary_))) print("Train Question Size: {:d}".format(len(train_data))) # Build Model # ================================================== row_size, rows = len(iq[0]), 1 input_c = tf.placeholder(tf.float32, [None, rows, row_size], name="ic") input_q = tf.placeholder(tf.float32, [None, rows, row_size], name="iq") input_r = tf.placeholder(tf.float32, [None, rows, row_size], name="ir") input_w = tf.placeholder(tf.float32, [None, rows, row_size], name="iw") state = tf.placeholder(tf.float32, [None, row_size], name="state") dropout = tf.placeholder(tf.float32, name="dropout") print("Building Model...") model = GRUModel(input_c, input_q, input_r, input_w, state, dropout, num_hidden=max_len) # Train Model # ================================================== def encode(c_batch, q_batch): def merge(article, question): prev = zero_state for sent in article: prev = sess.run( model.prediction, { input_c: [sent], # 1 x [rows x row_size]
# load dataset train_set = NameDataset(train=True) test_set = NameDataset(train=False) train_loader = DataLoader(dataset=train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS) test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS) N_COUNTRY = train_set.getCountriesNum() # 最终输出维度(类别数) # load model model = GRUModel(N_CHARS, HIDDEN_SIZE, N_COUNTRY, N_LAYER) # 字典长度(嵌入层维度)、 隐层数、 国家数(输出维度)、 GRU层数 model.to(device) # construct loss and optimizer criterion = nn.CrossEntropyLoss(reduction='mean') # optimizer = optim.Adam(model.parameters(), # lr=LEARNING_RATE) optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.5) # define train and test model def train(): for (names, countries) in train_loader: inputs, seq_lengths, target = make_tensors(names, countries) y_pred = model(inputs, seq_lengths) # forward loss = criterion(y_pred, target)
def main(): # Load vocab. vocab = Vocab.from_json(args.vocab_path) label_map = {0: 'negative', 1: 'positive'} # Constructs the newtork. network = args.network.lower() vocab_size = len(vocab) num_classes = len(label_map) pad_token_id = vocab.to_indices('[PAD]') if network == 'bow': model = BoWModel(vocab_size, num_classes, padding_idx=pad_token_id) elif network == 'bigru': model = GRUModel(vocab_size, num_classes, direction='bidirect', padding_idx=pad_token_id) elif network == 'bilstm': model = LSTMModel(vocab_size, num_classes, direction='bidirect', padding_idx=pad_token_id) elif network == 'bilstm_attn': lstm_hidden_size = 196 attention = SelfInteractiveAttention(hidden_size=2 * lstm_hidden_size) model = BiLSTMAttentionModel(attention_layer=attention, vocab_size=vocab_size, lstm_hidden_size=lstm_hidden_size, num_classes=num_classes, padding_idx=pad_token_id) elif network == 'birnn': model = RNNModel(vocab_size, num_classes, direction='bidirect', padding_idx=pad_token_id) elif network == 'cnn': model = CNNModel(vocab_size, num_classes, padding_idx=pad_token_id) elif network == 'gru': model = GRUModel(vocab_size, num_classes, direction='forward', padding_idx=pad_token_id, pooling_type='max') elif network == 'lstm': model = LSTMModel(vocab_size, num_classes, direction='forward', padding_idx=pad_token_id, pooling_type='max') elif network == 'rnn': model = RNNModel(vocab_size, num_classes, direction='forward', padding_idx=pad_token_id, pooling_type='max') else: raise ValueError( "Unknown network: %s, it must be one of bow, lstm, bilstm, cnn, gru, bigru, rnn, birnn and bilstm_attn." % network) # Load model parameters. state_dict = paddle.load(args.params_path) model.set_dict(state_dict) model.eval() inputs = [paddle.static.InputSpec(shape=[None, None], dtype="int64")] # Convert to static graph with specific input description if args.network in [ "lstm", "bilstm", "gru", "bigru", "rnn", "birnn", "bilstm_attn" ]: inputs.append(paddle.static.InputSpec(shape=[None], dtype="int64")) # seq_len model = paddle.jit.to_static(model, input_spec=inputs) # Save in static graph model. paddle.jit.save(model, args.output_path)