# raw words and labels file_words = open("../data/unique_equations.json") file_labels = open("../data/unique_labels.json") # pre calculated weight matrix file_weights = open("../data/weights_0.json") data_provider = DataProviderLight(file_words, file_labels, sample_size=sample_size, file_weights=file_weights) # embedding data, splitting up into train and test set processor = VectorProcessor(data_provider) generator = DeterministicGenerator(data_provider, processor) stopwatch = StopWatch() # training the word2vec net word2vec = Word2Vec(data_provider, FEATURES, DEVICE) word2vec.train(EPOCHS, BATCH_SIZE) # extracting weights and injecting them into the data provider data_provider.weights = torch.tensor(word2vec.get_weights()) # generate dataset dataset = generator.generate_dataset() # train knn classifier classifier = KNNClassifier(data_provider, dataset, DEVICE) classifier.n_neighbours = N_NEIGHBORS classifier.train() # test the classifier test = ClassificationTest(dataset, classifier) stopwatch.start() result = test.test() stopwatch.stop() print(str(sample_size) + " Samples: " + str(result) + "% Präzision")
from classification.KNNClassification import KNNClassifier from classification.ClassificationTest import ClassificationTest # script for corresponding test case # most test cases should be able to be executed without any further changes, if data is available FEATURES = 100 SAMPLE_SIZE = 10000 EPOCHS = 5 BATCH_SIZE = 32 # change device to "cpu" if cuda not available DEVICE = "cuda" stopwatch = StopWatch() # pregenerated embedding and labels file_words = open("../data/unique_equations.json") file_labels = open("../data/unique_labels.json") file_weights = open("../data/weights_0.json") data_provider = DataProviderLight(file_words, file_labels, sample_size=SAMPLE_SIZE, file_weights=file_weights) processor = VectorProcessor(data_provider) generator = DeterministicGenerator(data_provider, processor) w2v_epochs = Word2Vec(data_provider, FEATURES, DEVICE) stopwatch.start() w2v_epochs.train(EPOCHS, BATCH_SIZE) stopwatch.stop() data_provider.weights = torch.tensor(w2v_epochs.get_weights()) dataset = generator.generate_dataset() classifier = KNNClassifier(data_provider, dataset, DEVICE) classifier.n_neighbours = 5 classifier.train() test = ClassificationTest(dataset, classifier) print("Präzision: " + str(test.test()) + "%")