def build(self): print 'building rnn cell...' hidden_layer = RNN(self.rng, self.n_input, self.n_hidden, self.n_batch, self.x, self.Er, self.Ec, self.x_mask_r, self.x_mask_c, is_train=self.is_train, p=self.p) print 'building softmax output layer...' [h_r, h_c] = hidden_layer.activation output_layer = softmax(self.n_hidden, self.cluster_num, self.in_cluster_num, h_r, h_c) cost_r = self.categorical_crossentropy(output_layer.activation_r, self.y[:, :, 0]) cost_c = self.categorical_crossentropy(output_layer.activation_c, self.y[:, :, 1]) cost = cost_r + cost_c self.params = [ self.Er, self.Ec, ] self.params += hidden_layer.params self.params += output_layer.params lr = T.scalar('lr') gparams = [T.clip(T.grad(cost, p), -10, 10) for p in self.params] updates = self.optimizer(self.params, gparams, lr) self.train = theano.function( inputs=[ self.x, self.x_mask_r, self.x_mask_c, self.y, self.y_mask, self.n_batch, lr ], outputs=[cost], updates=updates, givens={self.is_train: np.cast['int32'](1)}) self.getNLL = theano.function( inputs=[self.x, self.x_mask_r, self.x_mask_c, self.n_batch], outputs=[output_layer.activation_r, output_layer.activation_c], givens={self.is_train: np.cast['int32'](0)}) self.predict = theano.function( inputs=[self.x, self.x_mask_r, self.x_mask_c, self.n_batch], outputs=[output_layer.predict_r, output_layer.predict_c], givens={self.is_train: np.cast['int32'](0)}) self.test = theano.function( inputs=[ self.x, self.x_mask_r, self.x_mask_c, self.y, self.y_mask, self.n_batch ], outputs=cost, givens={self.is_train: np.cast['int32'](0)})
parser.add_argument('--use_cuda', action='store_true') parser.add_argument('--train_all', action='store_true') parser.add_argument('--train_mode', type=str, default='X') parser.add_argument('--region_names_file', type=str, default='/home/ubuntu/baidu/data_processed/region_names.txt') args = parser.parse_args() dataset = InfectDataset(args,args.train_mode) inferece_test=inferece_data_split(dataset,args) # train_loader = DataLoader(train, batch_size=1, shuffle=False) # eval_loader=DataLoader(valid, batch_size=1, shuffle=False) inferece_loader=DataLoader(inferece_test, batch_size=1, shuffle=False) rnn=RNN(3,128,args.n_his,1).cuda() rnn.load_state_dict(torch.load('feature_best_lstm.pth')) # optimizer = torch.optim.Adam(rnn.parameters(),lr = 0.001) # loss_func = nn.MSELoss() # best_eval_arg_loss=10 numpy_data_list=[] with torch.no_grad(): rnn.eval() for j,batch in enumerate(inferece_test): data=torch.from_numpy(batch[0:5,:,:].reshape(1,392,5)).float().cuda() # print(data.shape) for day in range(50): # print(data.reshape(5,-1)) pred=rnn(data) pred=pred.reshape(1,-1)
parser.add_argument('--test_num', type=str, default=1) parser.add_argument('--use_cuda', action='store_true') parser.add_argument('--train_all', action='store_true') parser.add_argument('--train_mode', type=str, default='X') parser.add_argument( '--region_names_file', type=str, default='/home/ubuntu/baidu/data_processed/region_names.txt') args = parser.parse_args() dataset = InfectDataset(args, args.train_mode) train, valid = data_split(dataset, args) train_loader = DataLoader(train, batch_size=1, shuffle=False) eval_loader = DataLoader(valid, batch_size=1, shuffle=False) rnn = RNN(3, 128, args.n_his, 1).cuda() optimizer = torch.optim.Adam(rnn.parameters(), lr=args.lr) loss_func = nn.MSELoss() best_eval_arg_loss = 10 if args.train_mode == 'X': for i in range(args.epochs): totol_loss = 0 for j, batch in enumerate(train_loader): data = torch.from_numpy(batch[:, 0:5, :, :].reshape( 1, 392, 5)).float().cuda() label = torch.from_numpy(batch[:, 5, :, :].reshape( 1, 392, 1)).float().cuda() output = rnn(data)
def randomTrainingExample(): category = randomChoice(all_categories) line = randomChoice(category_lines[category]) category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long) line_tensor = lineToTensor(line) return category, line, category_tensor, line_tensor #negative log likelihood loss criterion = nn.NLLLoss() learning_rate = 0.001 # If you set this too high, it might explode. If too low, it might not learn n_hidden = 128 rnn = RNN(n_letters, n_hidden, n_categories) #LSTM model optimizer = optim.Adam(rnn.parameters(), lr=learning_rate) def train(category_tensor, line_tensor): rnn.zero_grad() rnn.hidden = rnn.init_hidden() output = rnn(line_tensor)[-1] loss = criterion(output.unsqueeze(0), category_tensor) loss.backward() optimizer.step()
tokenizer=tokenizer, task=TASK, label_col=label_col, output_size=output_size, device=DEVICE) print("Dataset created") # Define network, loss function and optimizer if MODEL_TYPE == "rnn": LSTM_PARAMS = dict(batch_first=True, num_layers=args.rnn_layers, dropout=args.dropout, bidirectional=True, hidden_size=args.hidden_size) net = RNN(embedding_dim=tokenizer.embedding_matrix.shape[1], output_size=output_size, rnn_params=LSTM_PARAMS, device=DEVICE) net.to(DEVICE) if torch.cuda.device_count() > 1: print("Using", torch.cuda.device_count(), "GPUs") net = nn.DataParallel(net) if USE_PRETRAIN: net = load_model(net, args.pretrained_model, DEVICE) elif MODEL_TYPE == "rnnsoft": net = RNNAtt( rnn_layers=args.rnn_layers, da_layers=args.da_layers, output_size=output_size, d_model=args.hidden_size, device=DEVICE, dropout_rate=args.dropout,
def main(): (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=MAX_NUM_WORDS) X_train = pad_sequences(X_train, maxlen=MAX_SEQ_LENGTH) X_test = pad_sequences(X_test, maxlen=MAX_SEQ_LENGTH) X = np.concatenate((X_train, X_test), axis=0) y = np.concatenate((y_train, y_test), axis=0) y = to_categorical(y) # print('Training samples: %i' % len(X)) # docs = negative_docs + positive_docs # labels = [0 for _ in range(len(negative_docs))] + [1 for _ in range(len(positive_docs))] # labels = to_categorical(labels) # print('Training samples: %i' % len(docs)) # tokenizer.fit_on_texts(docs) # sequences = tokenizer.texts_to_sequences(docs) # word_index = tokenizer.word_index # result = [len(x) for x in X] # print('Text informations:') # print('max length: %i / min length: %i / mean length: %i / limit length: %i' % (np.max(result), # np.min(result), # np.mean(result), # MAX_SEQ_LENGTH)) # print('vacobulary size: %i / limit: %i' % (len(word_index), MAX_NUM_WORDS)) # Padding all sequences to same length of `MAX_SEQ_LENGTH` # data = pad_sequences(X, maxlen=MAX_SEQ_LENGTH, padding='post') histories = [] for i in range(RUNS): print('Running iteration %i/%i' % (i + 1, RUNS)) random_state = np.random.randint(1000) X_train, X_val, y_train, y_val = train_test_split( X, y, test_size=VAL_SIZE, random_state=random_state) model = RNN(num_words=MAX_NUM_WORDS, embedding_dim=EMBEDDING_DIM, lstm_size=LSTM_SIZE, lstm_layers=LSTM_LAYERS, max_seq_length=MAX_SEQ_LENGTH, dropout_rate=DROPOUT_RATE, hidden_units=HIDDEN_UNITS, nb_classes=NB_CLASSES).build_model() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) if i == 0: print(model.summary()) history = model.fit( X_train, y_train, epochs=NB_EPOCHS, batch_size=BATCH_SIZE, verbose=1, validation_data=(X_val, y_val), callbacks=[ # TQDMCallback(), ModelCheckpoint('model-lstm-%i.h5' % (i + 1), monitor='val_loss', verbose=1, save_best_only=True, mode='min'), # TensorBoard(log_dir='./logs/temp', write_graph=True) ]) print() histories.append(history.history) with open('history-lstm.pkl', 'wb') as f: pickle.dump(histories, f) show_results()