from data_manager import DataManager # such as DataManager from prepro import Preprocessor input_dir = "../sample_data" output_dir = "../resuts" basename = 'credit' D = DataManager(basename, input_dir) # Load data print("*** Original data ***") print D Prepro = Preprocessor() # Preprocess on the data and load it back into D D.data['X_train'] = Prepro.fit_transform(D.data['X_train'], D.data['Y_train']) D.data['X_valid'] = Prepro.transform(D.data['X_valid']) D.data['X_test'] = Prepro.transform(D.data['X_test']) # Here show something that proves that the preprocessing worked fine print("*** Transformed data ***") print D # Preprocessing gives you opportunities of visualization: # Scatter-plots of the 2 first principal components # Scatter plots of pairs of features that are most relevant import matplotlib.pyplot as plt X = D.data['X_train'] Y = D.data['Y_train'] plt.scatter(X[:, 0], X[:, 1], c=Y) plt.xlabel('PC1') plt.ylabel('PC2')
return parser.parse_args() if __name__ == '__main__': # Get arguments print('Getting arguments...') args = get_args() # make a dataset print('Importing dataset...') data = SentimentDataset(data=args.test_path) # preprocess and save word encodings preprocessor = Preprocessor(max_vocab=args.max_vocab) preprocessor.load() data = preprocessor.transform(dataset=data) # validation split test_ds, _ = data.to_dataset() # to dataLoaders test_set = DataLoader(test_ds, batch_size=16, shuffle=False) # load saved model print('Loading trained model...') model = torch.load(args.model_path) model.eval() test(test_set, model, val=False)