def accuracy(config): data = pd.read_csv(config.data_path) inputs, targets = dataloader.process(data, window_size=5) device = torch.device(config.device) n = len(targets) train_inputs, train_targets = inputs[:round(.8 * n)].to( device), targets[:round(.8 * n)].to(device) test_inputs, test_targets = inputs[round(.8 * n):].to( device), targets[round(.8 * n):].to(device) print("Initializing LSTM model...") model = MoodPredictionModel(config.input_length, config.input_dim, config.num_hidden, config.num_layers).to(device) print("Loading model parameters from trained model") model.load_state_dict(torch.load(config.saved_model)) model.eval() print(model) train_out = model(train_inputs).squeeze() test_out = model(test_inputs).squeeze() correct_train = (abs(train_out - train_targets) < config.corr_thres).sum().item() train_accuracy = correct_train / train_out.size(0) correct_test = (abs(test_out - test_targets) < config.corr_thres).sum().item() test_accuracy = correct_test / test_out.size(0) print(f"Accuracy on training set: {train_accuracy*100}%") print(f"Accuracy on testing set: {test_accuracy*100}%")
def classify_coqa(args, device): args.config.batch_size = 1 config = args.config model = ELMoClassifier(config, device) model.cuda() # If the saving directory has no checkpoints, this function will not do anything load_weights(model, args.best_dir) with open('data/specificity_qa_dataset/dev.pickle', 'rb') as f: data = pickle.load(f) with torch.no_grad(): model.eval() correct_hand = 0 incorrect_hand = 0 correct_rule = 0 incorrect_rule = 0 class_map = ['overview', 'conceptual'] for i, instance in enumerate(data): if instance['dataset'] == 'quac': continue if i % 100 == 0: print("%d / %d" % (i, len(data))) for para in instance['paragraphs']: for qa in para['qas']: if qa['high_low_mode'] == 'rules': continue if qa['high_low'] == 'overview' or qa[ 'high_low'] == 'conceptual': _, preds, _ = model({ 'question': [process(qa['question'])], 'class': torch.LongTensor([0]) }) if qa['high_low_mode'] == 'hand' and class_map[ preds.item()] == qa['high_low']: correct_hand += 1 elif qa['high_low_mode'] == 'rules' and class_map[ preds.item()] == qa['high_low']: correct_rule += 1 elif qa['high_low_mode'] == 'hand' and class_map[ preds.item()] != qa['high_low']: incorrect_hand += 1 elif qa['high_low_mode'] == 'rules' and class_map[ preds.item()] != qa['high_low']: incorrect_rule += 1 print("%d / %d correct for hand" % (correct_hand, correct_hand + incorrect_hand)) print("%d / %d correct for rules" % (correct_rule, correct_rule + incorrect_rule)) print("%d / %d total correct" % (correct_rule + correct_hand, correct_rule + incorrect_rule + correct_hand + incorrect_hand))
def classify_final(args, device): args.config.batch_size = 1 config = args.config model = ELMoClassifier(config, device) model.cuda() # If the saving directory has no checkpoints, this function will not do anything load_weights(model, args.best_dir) correct_class = {'gold': 0, 'gen': 0} questions_so_far = {'gold': {}, 'gen': {}} total = {'gold': 0, 'gen': 0} with torch.no_grad(): model.eval() with open( 'doc2qa/final/final_crowd/results/question_more_relevant.csv', 'r') as f: data = csv.reader(f) for i, row in enumerate(data): if row[8][:3] == 'Zen': print("yolo") continue if i == 0: continue if row[2] == 'golden': continue for ques, tag in zip([row[12], row[13]], [row[16], row[17]]): if ques in questions_so_far[row[9]]: continue total[row[9]] += 1 auto_label = labeller(ques) if auto_label == 'none': _, preds, _ = model({ 'question': [process(ques)], 'class': torch.LongTensor([0]) }) if preds.item() == 0: auto_label = 'overview' else: auto_label = 'conceptual' questions_so_far[row[9]][ques] = 1 if auto_label in [ 'overview', 'causal', 'instrumental', 'judgemental' ] and tag == 'high': print(auto_label) correct_class[row[9]] += 1 elif auto_label == 'conceptual' and tag == 'low': correct_class[row[9]] += 1 print("Gold correct class = %d / %d" % (correct_class['gold'], total['gold'])) print("Gen correct class = %d / %d" % (correct_class['gen'], total['gen']))
def classify(args, device): args.config.batch_size = 1 config = args.config model = ELMoClassifier(config, device) model.cuda() # If the saving directory has no checkpoints, this function will not do anything load_weights(model, args.best_dir) with open('data/specificity_qa_dataset/dev.pickle', 'rb') as f: data = pickle.load(f) with torch.no_grad(): model.eval() for i, instance in enumerate(data): if instance['dataset'] == 'quac' or instance['dataset'] == 'coqa': continue if i % 1 == 0: print("%d / %d" % (i, len(data))) for para in instance['paragraphs']: for qa in para['qas']: if qa['high_low_mode'] == 'idk': if len(qa['question'].strip()) == 0: qa['high_low'] = 'overview' else: _, preds, _ = model({ 'question': [process(qa['question'])], 'class': torch.LongTensor([0]) }) qa['high_low_mode'] = 'classifier' qa['high_low'] = 'overview' if preds.item( ) == 0 else 'conceptual' with open('data/specificity_qa_dataset/dev.pickle', 'wb') as f: pickle.dump(data, f)
type=int, default=50, metavar='N', help='input batch size for testing (default: 50)') parser.add_argument('--lr', type=float, default=0.00001, metavar='LR', help='learning rate (default: 0.00001)') args = parser.parse_args() # load parameters torch.manual_seed(20) # generate random seeds for shuffle dataset device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # get data from folders and txt files trainData, trainGT, trainLabel, testData, testGT, testLabel = process( class_list, rote) print("------------Data Load Finished !!------------") # create my dataset by override torch.utils.data.DataSet train_dataset = MyDataLoader(trainData, trainGT, trainLabel) test_dataset = MyDataLoader(testData, testGT, testLabel) # dataloader to pytorch network train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=args.test_batch, shuffle=True) eval_loader = DataLoader(dataset=test_dataset, shuffle=False)
import pandas as pd import torch import dataloader if __name__ == "__main__": data = pd.read_csv("./dataset_mood_smartphone.csv") _, targets = dataloader.process(data) targets = torch.round(targets) n = len(targets) train_targets, test_targets = targets[:round(.8 * n)], targets[round(.8 * n):] train_diff = train_targets[:-1] == train_targets[1:] test_diff = test_targets[:-1] == test_targets[1:] train_accuracy = sum(train_diff) / len(train_diff) test_accuracy = sum(test_diff) / len(test_diff) print(f"Accuracy on training set: {train_accuracy}") print(f"Accuracy on testing set: {test_accuracy}")
import torch import pandas as pd from sklearn import svm import dataloader if __name__ == "__main__": data = pd.read_csv("./dataset_mood_smartphone.csv") inputs, targets = dataloader.process(data, window_size=2) inputs = inputs.mean(dim=1) targets = torch.round(targets) n = len(targets) train_inputs, train_targets = inputs[:round(.8 * n)], targets[:round(.8 * n)] test_inputs, test_targets = inputs[round(.8 * n):], targets[round(.8 * n):] clf = svm.SVC() clf.fit(train_inputs, train_targets) train_out = clf.predict(train_inputs) test_out = clf.predict(test_inputs) train_diff = torch.tensor(train_out) == train_targets test_diff = torch.tensor(test_out) == test_targets train_accuracy = sum(train_diff) / len(train_diff) test_accuracy = sum(test_diff) / len(test_diff) print(f"Accuracy on training set: {train_accuracy}")