def predict_cnn(config): # load tokenizer and torchtext Field pickle_tokenizer = open('pickles/tokenizer.pickle', 'rb') cohesion_scores = pickle.load(pickle_tokenizer) tokenizer = LTokenizer(scores=cohesion_scores) pickle_vocab = open('pickles/text.pickle', 'rb') text = pickle.load(pickle_vocab) model = CNN(config) model.load_state_dict(torch.load(config.save_model)) model.to(device) model.eval() tokenized = tokenizer.tokenize(config.input) min_len = config.filter_sizes[-1] # if user's input sentence is shorter than the largest filter size, add pad tokens to input sentence if len(tokenized) < min_len: tokenized += ['<pad>'] * (min_len - len(tokenized)) indexed = [text.vocab.stoi[token] for token in tokenized] length = [len(indexed)] tensor = torch.LongTensor(indexed).to(device) tensor = tensor.unsqueeze(1) length_tensor = torch.LongTensor(length) prediction = torch.sigmoid(model(tensor, length_tensor)) label = torch.round(prediction) if label == 1: label = 'Positive' else: label = 'Negative' sentiment_percent = prediction.item() print(f'[in] >> {config.input}') print(f'[out] >> {sentiment_percent*100:.2f} % : {label}')
num_input_channels = next(iter(trainer.train_dataloader))[0].shape[2] encoder_params = CNN.generate_params() rnn_params = RNN.generate_params() model = CRNN(input_len=input_len, output_len=output_len, num_input_channels=num_input_channels, encoder_params=encoder_params, rnn_params=rnn_params) elif args.model_type == 'mlp': input_len = reduce(lambda x, y: x * y, next(iter(trainer.train_dataloader))[0].shape[1:]) params = MLP.generate_params() model = MLP(input_len=input_len, output_len=output_len, params=params, **params) else: raise ValueError('"{}" is not a valid model.'.format(args.model_type)) model = model.to(device) # Train Network trainer.train(model=model, min_epochs=args.min_epochs, max_epochs=args.max_epochs, patience=args.patience, verbose=args.verbose, test_subject_id=test_subject_id, validation_subject_id=validation_subject_id) print('\n' + '#' * 50 + '\n')
if __name__ == '__main__': if not args.debug_mode: import wandb wandb.init(project=args.project, name=args.name, tags=args.tags, config=args) train_data = dataset.MDB_Dataset('MusicDelta_80sRock') test_data = dataset.MDB_Dataset('MusicDelta_80sRock') else: train_data = dataset.MDB_Dataset('MusicDelta_80sRock') test_data = dataset.MDB_Dataset('MusicDelta_80sRock') print_args(args) # get_model if args.model_arc == 'CNN': model = CNN(hidden_channel_num=10, output_number=4) else: raise AssertionError model = model.to(args.device) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) if not args.debug_mode: wandb.watch(model) trainer = Trainer(model, optimizer, args.device, args.debug_mode, args.test_per_epoch, args.num_epochs, args.weight_path, train_data, test_data) trainer.train()