if not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") train_data = dataloader('train_shuf.txt', args.batch_size, args.bptt) val_data = dataloader('val.txt', args.batch_size, args.bptt) eval_batch_size = args.batch_size ############################################################################### # Build the model ############################################################################### ntokens = 27 model = RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied) optimizer = optim.SGD(model.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss() ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Tensors, to detach them from their history.""" if isinstance(h, torch.Tensor): return h.detach() else: return tuple(repackage_hidden(v) for v in h)
sys.path.append("../d2l_func/") from data_prepare import load_data_jay_song, data_iter_random, data_iter_consecutive, to_onehot from model_train import train_rnn_pytorch from predict import predict_rnn_pytorch from rnn_model import RNNModel if __name__ == "__main__": # load data corpus_index, char_to_idx, vocab_set, vocab_size = load_data_jay_song() # model hidden_num = 256 rnn_layer = nn.LSTM(vocab_size, hidden_num) model = RNNModel(rnn_layer, vocab_size) model = model.cuda() loss = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) params = { "epoch_num": 10, "model": model, "loss": loss, "optimizer": optimizer, "batch_size": 64, "num_step": 32, "corpus_index": corpus_index, "data_iter": data_iter_consecutive, "char_to_idx": char_to_idx, "vocab_set": vocab_set, "vocab_size": vocab_size, "predict_rnn_pytorch": predict_rnn_pytorch, "pred_num": 50,
class RNNClassifier(BaseClassifier): def __init__(self): super().__init__('RNN') self.hyper_parameters = {} # dictionary of the chosen hyper-parameters self.model = None self.criterion = None self.optimizer = None self.sequence_length = None def get_hyper_parameters_grid(self): grid = { 'lr': [0.001, 0.01, 0.1], 'epochs': [10, 50, 100], 'n_neurons_fc': [64, 128, 256], 'hidden_dim': [64, 128, 256] } return grid def set_hyper_parameters(self, hyper_parameters_dict): self.hyper_parameters = hyper_parameters_dict def set_best_hyper_parameters(self): self.hyper_parameters = { 'lr': 0.01, 'epochs': 50, 'n_neurons_fc': 128, 'hidden_dim': 64 } def fit(self, X, y): X_tweet_text_tensor, X_other_features_tensor = self.get_X_tensors(X) y_tensor = self.convert_to_tensor(y, target=True) n_neurons_fc = self.hyper_parameters['n_neurons_fc'] hidden_dim = self.hyper_parameters['hidden_dim'] self.model = RNNModel(num_features=len(X.columns), num_class=2, hidden_dim=hidden_dim, n_neurons_fc=n_neurons_fc, sequence_length=self.sequence_length) self.init_loss_and_optimizer() epochs = self.hyper_parameters['epochs'] n_batches = 20 for i in range(epochs): for i in range(n_batches): # Local batches and labels local_X1, local_X2, local_y = self.get_batch( X_tweet_text_tensor, X_other_features_tensor, y_tensor, n_batches, i) self.optimizer.zero_grad() y_pred = self.model(local_X1, local_X2) loss = self.criterion(y_pred, local_y) loss.backward() self.optimizer.step() def predict(self, X): X_tweet_text_tensor, X_other_features_tensor = self.get_X_tensors(X) outputs = self.model(X_tweet_text_tensor, X_other_features_tensor) _, predictions = torch.max(outputs, 1) return predictions def predict_proba(self, X): X_tweet_text_tensor, X_other_features_tensor = self.get_X_tensors(X) outputs = self.model(X_tweet_text_tensor, X_other_features_tensor) predictions = outputs.detach().numpy() return predictions def init_loss_and_optimizer(self): """ Initializes the loss and optimizer for the current .fit """ self.criterion = CrossEntropyLoss() self.optimizer = Adam(self.model.parameters(), lr=self.hyper_parameters['lr']) def convert_to_tensor(self, df, target=False): """ converts the given DataFrame to a tensor. :param df: the DataFrame to convert :type df: pd.DataFrame :param target: indicates whether we are using the features df(False) or the target df(True). Defaults to False :type target: bool :return: the converted tensor :rtype: torch.Tensor """ if target: return torch.LongTensor(df.values) return torch.FloatTensor(df.values) def get_X_tensors(self, X): """ splits the given X df to tweet text indexes for embedding and other extracted features. :param X: the df to split :type X: pd.DataFrame :return: X_tweet_text_tensor, X_other_features_tensor :rtype: tuple """ X_tweet_text = X['tweet text'] X_other_features = X.drop(labels=['tweet text'], axis=1) X_tensor_other_features = self.convert_to_tensor(X_other_features, target=False) indices_list = [] for words_list in X_tweet_text.values: indices_list.append([word_to_ix[w] for w in words_list]) X_tensor_tweet_text = torch.LongTensor(indices_list) # X_tensor_tweet_text = torch.LongTensor([word_to_ix[w] for w in X_tweet_text.values]) return X_tensor_tweet_text, X_tensor_other_features def get_batch(self, X_tweet_text_tensor, X_other_features_tensor, y_tensor, n_batches, i): """ Creates the i'th batch from the given data. :param X_tweet_text_tensor: data to get batch from :type X_tweet_text_tensor: torch.Tensor :param X_other_features_tensor: data to get batch from :type X_other_features_tensor: torch.Tensor :param y_tensor: data to get batch from :type y_tensor: torch.Tensor :param n_batches: the amount of total batches we need :type n_batches: int :param i: the current batch we want to take :type i: int :return: a tuple of the batched data :rtype: tuple """ X1_batch = X_tweet_text_tensor[i * n_batches:(i + 1) * n_batches, ] X2_batch = X_other_features_tensor[i * n_batches:(i + 1) * n_batches, ] y_batch = y_tensor[i * n_batches:(i + 1) * n_batches, ] return X1_batch, X2_batch, y_batch
X_val, Y_val = tensor_loader.load_X_Y_rnn(logger, args.val_table_name, chunk=0, total_chunks=total_chunks, no_gpu=args.no_gpu, validation_set=True) N, seq_length, D_in = X_train.shape # Number of samples, sequence length, number of features. if args.top100_labels: # Dimension of the hidden units, and dimension of the output vector. H, D_out = 1000, 100 else: H, D_out = 100, 10 model = RNNModel(D_in, H, D_out) if not args.no_gpu: model.cuda() loss_fn = torch.nn.BCEWithLogitsLoss(size_average=True) learning_rate, decay, momentum = 0.01, 1e-6, 0.9 optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=decay, momentum=momentum, nesterov=True) tb_logger_train = tensorboardX.SummaryWriter(log_dir='../tensorboard_logs/rnn_train_' + str(experiment_id)) tb_logger_val = tensorboardX.SummaryWriter(log_dir='../tensorboard_logs/rnn_val_' + str(experiment_id)) metrics_train = defaultdict(list) metrics_val = defaultdict(list) metrics_test = defaultdict(list) epochs = 3 # TODO move to program args for chunk in range(total_chunks): if chunk > 0: # Load next chunk (first chunk will already be loaded). X_train, Y_train = tensor_loader.load_X_Y_rnn(logger, args.train_table_name, chunk=chunk, total_chunks=total_chunks, no_gpu=args.no_gpu) X_val, Y_val = tensor_loader.load_X_Y_rnn(logger, args.val_table_name, chunk=chunk, total_chunks=total_chunks, no_gpu=args.no_gpu, validation_set=True) for epoch in range(epochs): # First of all, train the model using the training set.