def store_qas(dataset, qas, vocab, max_length=20): total = len(qas) questions = dataset.create_dataset('questions', (total, args.max_length), dtype='i') answers = dataset.create_dataset('answers', (total, args.max_length), dtype='i') categories = dataset.create_dataset('categories', (total, ), dtype='i') image_indices = dataset.create_dataset('image_indices', (total, ), dtype='i') image_ids = [] bar = progressbar.ProgressBar(maxval=len(qas)) for idx, entry in enumerate(qas): i_image = len(image_ids) if entry['image_id'] in image_ids: i_image = image_ids.index(entry['image_id']) else: image_ids.append(entry['image_id']) image_indices[idx] = i_image categories[idx] = entry['category'] q, length = process_text(entry['question'].encode('utf-8'), vocab, max_length=max_length) questions[idx, :length] = q a, length = process_text(entry['answer'].encode('utf-8'), vocab, max_length=max_length) answers[idx, :length] = a bar.update(idx) return image_ids
def save_dataset(dataset, vocab, output, max_length=20): sentences=[] labels=[] with open (dataset) as questions_file: csv_reader=csv.reader(questions_file, delimiter='\t') line_count=0 column_names=[] for row in csv_reader: if line_count==0: column_names.append(row[0]) column_names.append(row[1]) line_count+=1 else: sentences.append(row[1]) labels.append(row[2]) total_sentences=len(sentences) vocab=load_vocab(vocab) print('Number of sentences to be written: %d',total_sentences) h5file = h5py.File(output, "w") d_questions = h5file.create_dataset( "sentences", (total_sentences, max_length), dtype='i') d_labels = h5file.create_dataset( "labels", (total_sentences,), dtype='i') bar = progressbar.ProgressBar(maxval=total_sentences) q_index = 0 for sentence,label in zip(sentences,labels): q, length = process_text(sentence, vocab, max_length=20) d_questions[q_index, :length] = q if label == 'yesno' or label == 'factoid' or label == 'list' or label == 'summary': x=label else: print('error') l = process_label(label) d_labels[q_index] = int(l) q_index += 1 bar.update(q_index) h5file.close()
def save_dataset(image_dir, questions, annotations, vocab, ans2cat, output, im_size=224, max_q_length=20, max_a_length=4, with_answers=False): """Saves the Visual Genome images and the questions in a hdf5 file. Args: image_dir: Directory with all the images. questions: Location of the questions. annotations: Location of all the annotations. vocab: Location of the vocab file. ans2cat: Mapping from answers to category. output: Location of the hdf5 file to save to. im_size: Size of image. max_q_length: Maximum length of the questions. max_a_length: Maximum length of the answers. with_answers: Whether to also save the answers. """ # Load the data. vocab = load_vocab(vocab) with open(annotations) as f: annos = json.load(f) with open(questions) as f: questions = json.load(f) with open(ans2cat) as f: ans2cat = json.load(f) # Get the mappings from qid to answers. qid2ans, image_ids = create_answer_mapping(annos, ans2cat) total_questions = len(qid2ans.keys()) total_images = len(image_ids) print "Number of images to be written: %d" % total_images print "Number of QAs to be written: %d" % total_questions h5file = h5py.File(output, "w") d_questions = h5file.create_dataset( "questions", (total_questions, max_q_length), dtype='i') d_indices = h5file.create_dataset( "image_indices", (total_questions,), dtype='i') d_images = h5file.create_dataset( "images", (total_images, im_size, im_size, 3), dtype='f') d_answers = h5file.create_dataset( "answers", (total_questions, max_a_length), dtype='i') d_answer_types = h5file.create_dataset( "answer_types", (total_questions,), dtype='i') # Create the transforms we want to apply to every image. transform = transforms.Compose([ transforms.Resize((im_size, im_size))]) # Iterate and save all the questions and images. bar = progressbar.ProgressBar(maxval=total_questions) i_index = 0 q_index = 0 done_img2idx = {} for entry in questions['questions']: image_id = entry['image_id'] question_id = entry['question_id'] if image_id not in image_ids: continue if question_id not in qid2ans: continue if image_id not in done_img2idx: try: path = "%d.jpg" % (image_id) image = Image.open(os.path.join(image_dir, path)).convert('RGB') except IOError: path = "%012d.jpg" % (image_id) image = Image.open(os.path.join(image_dir, path)).convert('RGB') image = transform(image) d_images[i_index, :, :, :] = np.array(image) done_img2idx[image_id] = i_index i_index += 1 q, length = process_text(entry['question'], vocab, max_length=max_q_length) d_questions[q_index, :length] = q answer = qid2ans[question_id] a, length = process_text(answer, vocab, max_length=max_a_length) d_answers[q_index, :length] = a d_answer_types[q_index] = ans2cat[answer] d_indices[q_index] = done_img2idx[image_id] q_index += 1 bar.update(q_index) h5file.close() print "Number of images written: %d" % i_index print "Number of QAs written: %d" % q_index
def save_dataset(image_dir, questions_path, vocab, output, im_size=224, max_q_length=20, max_a_length=6, max_c_length=20, with_answers=False): """Saves the Visual Genome images and the questions in a hdf5 file. Args: image_dir: Directory with all the images. questions: Location of the questions. annotations: Location of all the annotations. vocab: Location of the vocab file. output: Location of the hdf5 file to save to. im_size: Size of image. max_q_length: Maximum length of the questions. max_a_length: Maximum length of the answers. with_answers: Whether to also save the answers. """ # Load the data. vocab = load_vocab(vocab) with open(questions_path) as f: questions = json.load(f) # Get the mappings from qid to answers. qid2ans, image_ids = create_questions_images_ids(questions) total_questions = len(qid2ans) total_images = len(image_ids) print ("Number of images to be written: %d" % total_images) print ("Number of QAs to be written: %d" % total_questions) h5file = h5py.File(output, "w") d_questions = h5file.create_dataset( "questions", (total_questions, max_q_length), dtype='i') d_indices = h5file.create_dataset( "image_indices", (total_questions,), dtype='i') d_images = h5file.create_dataset( "images", (total_images, im_size, im_size, 3), dtype='f') d_answers = h5file.create_dataset( "answers", (total_questions, max_a_length), dtype='i') # Create the transforms we want to apply to every image. transform = transforms.Compose([ transforms.Resize((im_size, im_size))]) # Iterate and save all the questions and images. bar = progressbar.ProgressBar(maxval=total_questions) i_index = 0 q_index = 0 done_img2idx = {} images_ids=[] for entry in questions: image_id = entry['image_id'] images=[] images.append(image_id) question_id = entry['question_id'] if image_id not in image_ids: continue if question_id not in qid2ans: continue if image_id not in done_img2idx: try: path = image_id image = Image.open(os.path.join(image_dir, path+".jpg")).convert('RGB') except IOError: path = image_id image = Image.open(os.path.join(image_dir, path)).convert('RGB') image = transform(image) d_images[i_index, :, :, :] = np.array(image) done_img2idx[image_id] = i_index i_index += 1 q, length = process_text(entry['question'], vocab, max_length=max_q_length) d_questions[q_index, :length] = q a, length = process_text(entry['answer'], vocab, max_length=max_a_length) d_answers[q_index, :length] = a d_indices[q_index] = done_img2idx[image_id] images.append(int(d_indices[q_index])) images_ids.append(images) #print(str(d_indices[q_index])) q_index += 1 bar.update(q_index) h5file.close() print ("Number of images written: %d" % i_index) print ("Number of QAs written: %d" % q_index) with open(os.path.join('data/vqa/', 'results_image_v.json'), 'w') as results_file: json.dump(images_ids, results_file)