def gen_txt(): with open(q_voc_path, "r") as f: q_dic = json.load(f) word_list = [] exc_list = ["<break>", "<END>", "<START>", "<UNKNOWN>", "<UNK>"] for k, _ in q_dic.items(): # exclude <break>, <END>, <START>, <UNKNOWN> if (k not in exc_list): word_list.append(k) sent_list = [] _, raw_ques, _ = VQADataProvider.load_raw_iqa(q_a_i_path) for ques in raw_ques: sent_list.append(VQADataProvider.text_to_list(ques)) # with open(q_a_i_path, "r") as csvfile: # # QA = csv.reader(csvfile, delimiter="\t", quotechar='\n') # for row in QA: # sent_list.append(data_provider.VQADataProvider.seq_to_list(row[2])) sent_idx_list = [] for sent in sent_list: sent_idx_list.append( [word_list.index(x) for x in sent if x not in exc_list]) with open(wordlist_path, "w") as f: for item in word_list: f.write("%s\n" % item) with open(doc_path, "w") as f: for sent in sent_idx_list: f.write(" ".join([str(i) for i in sent]) + "\n")
def pred(opt, folder, logger): assert opt.RESUME_PATH, 'please specify the model file' dp = VQADataProvider(opt, batchsize=opt.VAL_BATCH_SIZE, mode='val', logger=logger) opt.quest_vob_size, opt.ans_vob_size = dp.get_vocab_size() logger.info('==> Resuming from checkpoint..') checkpoint = torch.load(opt.RESUME_PATH, map_location='cpu') model = get_model(opt) model.load_state_dict(checkpoint['model']) model = cuda_wrapper(model) test_loss, acc_overall, acc_per_ques, acc_per_ans = exec_validation( model, opt, mode='val', folder=folder, it=0, visualize=True, dp=dp, logger=logger) logger.info('Test loss: {}'.format(test_loss)) logger.info('Accuracy: {}'.format(acc_overall)) logger.info('Test per ans: {}'.format(acc_per_ans))
def make_vocab_for_embedding(): embedding_path = os.path.join( root_path, "embedding/BioWordVec_PubMed_MIMICIII_d200.vec.bin") MED = KeyedVectors.load_word2vec_format(embedding_path, binary=True) # print(type(MED)) word_dict = MED.wv.vocab # print(type(words)) # print(len(words)) raw_words = list(word_dict.keys()) print(len(raw_words)) # remove the word not appear in the dataset (training + val + test) # load all word all_word_in_data = set() train_val_path = os.path.join(root_path, "data/train_val/All_QA_Pairs_train_val.txt") test_path = os.path.join(root_path, "data/test/VQAMed2019_Test_Questions.txt") with open(train_val_path, "r") as f: print("process train_val file") for row in f: i_q_a = row.rstrip().split("|") q_words = VQADataProvider.text_to_list(i_q_a[1]) q_words = q_words[:-1] for w in q_words: all_word_in_data.add(w) with open(test_path, "r") as f: print("process test file") for row in f: i_q = row.rstrip().split("|") q_words = VQADataProvider.text_to_list(i_q[1]) q_words = q_words[:-1] for w in q_words: all_word_in_data.add(w) # filter words = [] print("filter") for w in all_word_in_data: if w in word_dict: words.append(w) print(len(words)) # add padding words.insert(0, "<PAD>") # # add UNK # words.append("<UNK>") # save words list to file for mapping questions to a list of indices save_path = os.path.join(root_path, "embedding/embed_mapping.pkl") with open(save_path, "wb") as f: pickle.dump(words, f) # save the matrix of embedding to file save_path_2 = os.path.join(root_path, "embedding/embedding_matrix.npy") embedding_matrix = np.zeros((len(words), 200)) for i in range(1, len(words)): embedding_matrix[i] = MED[words[i]] np.save(save_path_2, embedding_matrix) return words, embedding_matrix
def make_vocab_files(): """ Produce the question and answer vocabulary files. """ print('making question vocab...', opt.QUESTION_VOCAB_SPACE) qdic, _ = VQADataProvider.load_data(opt.QUESTION_VOCAB_SPACE) question_vocab = make_question_vocab(qdic) print('making answer vocab...', opt.ANSWER_VOCAB_SPACE) _, adic = VQADataProvider.load_data(opt.ANSWER_VOCAB_SPACE) answer_vocab = make_answer_vocab(adic, opt.NUM_OUTPUT_UNITS) return question_vocab, answer_vocab
def make_vocab_files(opt, filename, ques_or_ans): save_path = os.path.join(root_path, "vocab/%s_vocab.json" % ques_or_ans) # load data if (ques_or_ans == "question"): _, sentence_ls, _ = VQADataProvider.load_raw_iqa(filename) elif (ques_or_ans == "answer"): _, _, sentence_ls = VQADataProvider.load_raw_iqa(filename) else: sentence_ls = None vocab_dict = make_vocab(sentence_ls) # save to json file with open(save_path, "w") as f: json.dump(vocab_dict, f) print("%s-%s vocabulary saved" % (filename, ques_or_ans))
def check_ans_vocab(filename): _, _, sentence_ls = VQADataProvider.load_raw_iqa(filename) word_fre_dict = {} for sent in sentence_ls: word_ls = VQADataProvider.text_to_list(sent) for word in word_ls: if (word in word_fre_dict): word_fre_dict[word] += 1 else: word_fre_dict[word] = 1 # sort word_fre_dict = sorted(word_fre_dict.items(), key=lambda kv: kv[1], reverse=True) return word_fre_dict
def test(): # load tf-idf with open(os.path.join(root_path, "models/pretrained/tfidf.pkl"), "rb") as f: vectorizer = pickle.load(f) # load validation data val_ques, val_lab = VQADataProvider.label_ques("val") val_ques_matrix = vectorizer.transform(val_ques) # load model with open(os.path.join(root_path, "models/pretrained/pretrained_svm.pkl"), "rb") as f: clf = pickle.load(f) # compute validation accuracy val_acc = clf.score(val_ques_matrix, val_lab) print("validation accuracy") print(val_acc) # compute validation confusion matrix preds = clf.predict(val_ques_matrix) val_c_m = confusion_matrix(val_lab, preds) print("validation confusion matrix") print(val_c_m)
def make_vocab(sentence_ls, vocab_size=-1): word_fre_dic = {} for sent in sentence_ls: word_ls = VQADataProvider.text_to_list(sent) for word in word_ls: if (word in word_fre_dic): word_fre_dic[word] += 1 else: word_fre_dic[word] = 1 # sort vocab_ls = [ k for (k, v) in sorted( word_fre_dic.items(), key=lambda x: x[1], reverse=True) ] if (vocab_size != -1 and vocab_size <= len(vocab_ls)): vocab_ls = vocab_ls[:vocab_size] vocab_ls.reverse() # create dict with index vocab_dict = {} for i in range(len(vocab_ls)): vocab_dict[vocab_ls[i]] = i return vocab_dict
def make_ans_vocab_file(opt, filename): save_path = os.path.join(root_path, "vocab/answer_vocab.pkl") # loada data _, _, sentence_ls = VQADataProvider.load_raw_iqa(filename) vocab_ls = make_vocab_ans(sentence_ls) with open(save_path, "wb") as f: pickle.dump(vocab_ls, f) #for debug print(vocab_ls[:10]) return len(vocab_ls)
def check_len(filename): q_len_num = {} a_len_num = {} exclude_ls = ["<break>", "<START>", "<END>", "<UNKNOWN>"] # exclude_ls = ["<START>", "<END>"] with open(filename, "r") as f: for row in f: qa = row.rstrip().split("|") # words_q = qa[1].split() words_q = VQADataProvider.text_to_list(qa[1]) words_q = [word for word in words_q if word not in exclude_ls] if (len(words_q) in q_len_num): q_len_num[len(words_q)] += 1 else: q_len_num[len(words_q)] = 1 # words_a = qa[2].split() words_a = VQADataProvider.text_to_list(qa[2]) words_a = [word for word in words_a if word not in exclude_ls] if (len(words_a) in a_len_num): a_len_num[len(words_a)] += 1 else: a_len_num[len(words_a)] = 1 return q_len_num, a_len_num
def __getitem__(self, index): # (img_path, label_vec) = self.img_lab_list[index] (img_name, label) = self.img_lab_list[index] # image_path = os.path.join(root_path, self.img_folder, img_path) image_path = os.path.join(self.img_folder, img_name) image = Image.open(image_path).convert('RGB') # adjust long image to approximately square image ratio = image.size[0] / image.size[1] if (ratio > self.opt.IMG_RATIO_THRESHOLD or ratio < (1 / self.opt.IMG_RATIO_THRESHOLD)): image = VQADataProvider.adjust_img(image) img_vec = self.transformations(image) # return img_vec, np.asarray(label_vec) return img_vec, label
def make_question_vocab(qdic): """ Returns a dictionary that maps words to indices. """ vdict = {'': 0} vid = 1 for qid in qdic.keys(): # sequence to list q_str = qdic[qid]['qstr'] q_list = VQADataProvider.seq_to_list(q_str) # create dict for w in q_list: if not vdict.has_key(w): vdict[w] = vid vid += 1 return vdict
def train(): # prepare the data # utils folder is in the root path, which is /home/leishi/vqa2019 questions, labels = VQADataProvider.label_ques("train_val") vectorizer = TfidfVectorizer() ques_matrix = vectorizer.fit_transform(questions) # build classifier # svc classifier = SVC(gamma="scale", verbose=False) # # rf # classifier = RandomForestClassifier() # # gb # classifier = GradientBoostingClassifier() # train classifier.fit(ques_matrix, labels) # # training accuracy train_acc = classifier.score(ques_matrix, labels) print("training accuracy: ", train_acc) # save tf-idf with open(os.path.join(root_path, "models/pretrained/tfidf_train_val.pkl"), "wb") as f: pickle.dump(vectorizer, f) # save classifier with open( os.path.join(root_path, "models/pretrained/pretrained_svm_train_val.pkl"), "wb") as f: pickle.dump(classifier, f) # # debug # print("\nraw questions") # print(questions[:5]) # print("question matrix") # print(vectorizer.transform(questions[:5]).todense()) # # print("corresponding words of transformed questions") # # print(vectorizer.inverse_transform(vectorizer.transform(questions[:5]))) # print("terms used") # print(vectorizer.get_feature_names()) return vectorizer, classifier
def make_vocab_ans(sentence_ls, vocab_size=-1): word_fre_dic = {} for sent in sentence_ls: word_ls = VQADataProvider.text_to_list(sent) for word in word_ls: if (word in word_fre_dic): word_fre_dic[word] += 1 else: word_fre_dic[word] = 1 # sort vocab_ls = [ k for (k, v) in sorted( word_fre_dic.items(), key=lambda x: x[1], reverse=True) ] if (vocab_size != -1 and vocab_size <= len(vocab_ls)): vocab_ls = vocab_ls[:vocab_size - 1] # add <unknown> vocab_ls.append("<UNK>") # # add <ZERO> for padding # vocab_ls.insert(0, "<ZERO>") return vocab_ls
def label_img_with_ques_etm(): opt = config.parse_opt() q_i_a_path = os.path.join(root_path, "data/train/All_QA_Pairs_train.txt") img_ques_dict = {} with open(q_i_a_path, "r") as f: for row in f: q_i_a = row.strip().split("|") img = q_i_a[0] ques = q_i_a[1] if (img in img_ques_dict): img_ques_dict[img].append(ques) else: img_ques_dict[img] = [ques] img_topic_dict = {} for img, qs in img_ques_dict.items(): img_topic_vector = np.zeros(opt.ETM_TOP_NUM) for q in qs: words = VQADataProvider.text_to_list(q) q_t_v = etm_topic_distrib(words) img_topic_vector = np.add(img_topic_vector, q_t_v) img_topic_dict[img] = (np.argmax(img_topic_vector)).item() return img_topic_dict
for row in f: val_img_ids.append(row.strip()) print("train+val images number: ", len(train_img_ids) + len(val_img_ids)) comb_ids = train_img_ids + val_img_ids comb_ids = set(comb_ids) print("unique total images number: ", len(comb_ids)) if __name__ == "__main__": filename = os.path.join( root_path, "data/train_val/QAPairsByCategory/C4_Abnormality_train_val.txt") _, _, ans_set = load_all2set(filename) unq_words = set() for ans in ans_set: words = VQADataProvider.text_to_list(ans) for w in words: unq_words.add(w) print(len(unq_words)) print(list(unq_words)[:5]) print("<END>" in unq_words) # filename = "/Users/leishi/Desktop/Internship/vqa2019/ImageClef-2019-VQA-Med-Training/QAPairsByCategory/C4_Abnormality_train.txt" # all_ans, all_img, all_ques, all_q_i_pairs, all_a_a_pairs = check_raw_data(filename) # print("\nunique answer length: ", len(all_ans)) # print("\nunique img length: ", len(all_img)) # print("\nunique question length: ", len(all_ques)) # # print("\nall answer") # # print(all_ans) # print("\nunique question-image pairs: ", len(all_q_i_pairs)) # print("\nunique question-answer pairs: ", len(all_a_a_pairs))
def exec_validation(model, opt, mode, folder, it, logger, visualize=False, dp=None): """ execute validation and save predictions as json file for visualization avg_loss: average loss on given validation dataset split acc_overall: overall accuracy """ if opt.LATE_FUSION: criterion = nn.BCELoss() model_prob = model[1] model = model[0] else: criterion = nn.NLLLoss() check_mkdir(folder) model.eval() # criterion = nn.KLDivLoss(reduction='batchmean') if opt.BINARY: criterion2 = nn.BCELoss() acc_counter = 0 all_counter = 0 if not dp: dp = VQADataProvider(opt, batchsize=opt.VAL_BATCH_SIZE, mode=mode, logger=logger) epoch = 0 pred_list = [] loss_list = [] stat_list = [] total_questions = len(dp.getQuesIds()) percent_counter = 0 logger.info('Validating...') while epoch == 0: data, word_length, img_feature, answer, embed_matrix, ocr_length, ocr_embedding, ocr_tokens, ocr_answer_flags, qid_list, iid_list, epoch = dp.get_batch_vec() data = cuda_wrapper(Variable(torch.from_numpy(data))).long() word_length = cuda_wrapper(torch.from_numpy(word_length)) img_feature = cuda_wrapper(Variable(torch.from_numpy(img_feature))).float() label = cuda_wrapper(Variable(torch.from_numpy(answer))) ocr_answer_flags = cuda_wrapper(torch.from_numpy(ocr_answer_flags)) if opt.OCR: embed_matrix = cuda_wrapper(Variable(torch.from_numpy(embed_matrix))).float() ocr_length = cuda_wrapper(torch.from_numpy(ocr_length)) ocr_embedding= cuda_wrapper(Variable(torch.from_numpy(ocr_embedding))).float() if opt.BINARY: ocr_answer_flags = cuda_wrapper(ocr_answer_flags) if opt.LATE_FUSION: binary = model(data, img_feature, embed_matrix, ocr_length, ocr_embedding, mode) pred = model_prob(data, img_feature, embed_matrix, ocr_length, ocr_embedding, mode) pred1 = pred[:, 0:opt.MAX_ANSWER_VOCAB_SIZE] pred2 = pred[:, opt.MAX_ANSWER_VOCAB_SIZE:] else: binary, pred1, pred2 = model(data, img_feature, embed_matrix, ocr_length, ocr_embedding, mode) else: pred = model(data, img_feature, embed_matrix, ocr_length, ocr_embedding, mode) elif opt.EMBED: embed_matrix = cuda_wrapper(Variable(torch.from_numpy(embed_matrix))).float() pred = model(data, img_feature, embed_matrix, mode) else: pred = model(data, word_length, img_feature, mode) if mode == 'test-dev' or mode == 'test': pass else: if opt.BINARY: if opt.LATE_FUSION: loss = criterion(binary, ocr_answer_flags.float()) else: loss = criterion2(binary, ocr_answer_flags.float()) * opt.BIN_LOSS_RATE loss += criterion(pred1[label < opt.MAX_ANSWER_VOCAB_SIZE], label[label < opt.MAX_ANSWER_VOCAB_SIZE].long()) loss += criterion(pred2[label >= opt.MAX_ANSWER_VOCAB_SIZE], label[label >= opt.MAX_ANSWER_VOCAB_SIZE].long() - opt.MAX_ANSWER_VOCAB_SIZE) all_counter += binary.size()[0] acc_counter += torch.sum((binary <= 0.5) * (ocr_answer_flags == 0) + (binary > 0.5) * (ocr_answer_flags == 1)) #print(all_counter, acc_counter) else: loss = criterion(pred, label.long()) loss = (loss.data).cpu().numpy() loss_list.append(loss) if opt.BINARY: binary = (binary.data).cpu().numpy() pred1 = (pred1.data).cpu().numpy() pred2 = (pred2.data).cpu().numpy() pred = np.hstack([pred1, pred2]) else: pred = (pred.data).cpu().numpy() if opt.OCR: # select the largest index within the ocr length boundary ocr_mask = np.fromfunction(lambda i, j: j >= (ocr_length[i].cpu().numpy() + opt.MAX_ANSWER_VOCAB_SIZE), pred.shape, dtype=int) if opt.BINARY: #ocr_mask += np.fromfunction(lambda i, j: np.logical_or(np.logical_and(binary[i] <= 0.5, j >= opt.MAX_ANSWER_VOCAB_SIZE), np.logical_and(binary[i] > 0.5, j < opt.MAX_ANSWER_VOCAB_SIZE)), pred.shape, dtype=int) #ocr_mask += np.fromfunction(lambda i, j: np.logical_or(np.logical_and(ocr_answer_flags[i] == 0, j >= opt.MAX_ANSWER_VOCAB_SIZE), np.logical_and(ocr_answer_flags[i] == 1, j < opt.MAX_ANSWER_VOCAB_SIZE)), pred.shape, dtype=int) ocr_mask += np.fromfunction(lambda i, j: np.logical_or(np.logical_and(ocr_answer_flags[i].cpu().numpy() == 0, j >= opt.MAX_ANSWER_VOCAB_SIZE), np.logical_and(ocr_answer_flags[i].cpu().numpy() == 1, j < opt.MAX_ANSWER_VOCAB_SIZE)), pred.shape, dtype=int) masked_pred = np.ma.array(pred, mask=ocr_mask) pred_max = np.ma.argmax(masked_pred, axis=1) pred_str = [dp.vec_to_answer_ocr(pred_symbol, ocr) for pred_symbol, ocr in zip(pred_max, ocr_tokens)] else: pred_max = np.argmax(pred, axis=1) pred_str = [dp.vec_to_answer(pred_symbol) for pred_symbol in pred_max] for qid, iid, ans, pred, ocr in zip(qid_list, iid_list, answer.tolist(), pred_str, ocr_tokens): pred_list.append((pred, int(dp.getStrippedQuesId(qid)))) # prepare pred json file if visualize: q_list = dp.seq_to_list(dp.getQuesStr(qid), opt.MAX_QUESTION_LENGTH) if mode == 'test-dev' or mode == 'test': ans_str = '' ans_list = ['']*10 else: if opt.OCR: ans_str = dp.vec_to_answer_ocr(int(ans), ocr) else: ans_str = dp.vec_to_answer(int(ans)) ans_list = [ dp.getAnsObj(qid)[i]['answer'] for i in range(10)] stat_list.append({ 'qid': qid, 'q_list': q_list, 'iid': iid, 'answer': ans_str, 'ans_list': ans_list, 'pred': pred, 'ocr_tokens': ocr }) percent = 100 * float(len(pred_list)) / total_questions if percent <= 100 and percent - percent_counter >= 5: percent_counter = percent sys.stdout.write('\r' + ('%.2f' % percent) + '%') sys.stdout.flush() if visualize: with open(os.path.join(folder, 'visualize.json'), 'w') as f: json.dump(stat_list, f, indent=4, sort_keys=True) if opt.BINARY: logger.info('Binary Acc: {},({}/{})'.format(acc_counter.item()/all_counter, acc_counter, all_counter)) logger.info('Deduping arr of len {}'.format(len(pred_list))) deduped = [] seen = set() for ans, qid in pred_list: if qid not in seen: seen.add(qid) deduped.append((ans, qid)) logger.info('New len {}'.format(len(deduped))) final_list=[] for ans,qid in deduped: final_list.append({u'answer': ans, u'question_id': qid}) if mode == 'val': avg_loss = np.array(loss_list).mean() valFile = os.path.join(folder, 'val2015_resfile') with open(valFile, 'w') as f: json.dump(final_list, f) # if visualize: # visualize_pred(stat_list,mode) exp_type = opt.EXP_TYPE annFile = config.DATA_PATHS[exp_type]['val']['ans_file'] quesFile = config.DATA_PATHS[exp_type]['val']['ques_file'] vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(valFile, quesFile) vqaEval = VQAEval(vqa, vqaRes, n=2) vqaEval.evaluate() acc_overall = vqaEval.accuracy['overall'] acc_perQuestionType = vqaEval.accuracy['perQuestionType'] acc_perAnswerType = vqaEval.accuracy['perAnswerType'] elif mode == 'test-dev': filename = os.path.join(folder, 'test-dev_results_' + str(it).zfill(8)) with open(filename+'.json', 'w') as f: json.dump(final_list, f) # if visualize: # visualize_pred(stat_list,mode) elif mode == 'test': filename = os.path.join(folder, 'test_results_' + str(it).zfill(8)) with open(filename+'.json', 'w') as f: json.dump(final_list, f) # if visualize: # visualize_pred(stat_list,mode) return avg_loss, acc_overall, acc_perQuestionType, acc_perAnswerType