def preprocessAnswers(answers, vocab, fAppend): ret = [] for e in answers: e = e['answer'].lower().split() x = nlp.applyVocab(e, vocab).tolist() ret += [x] ret = np.asarray(ret) with h5py.File('data/preprocessed/clevr.h5', 'a') as f: data = f.create_dataset(fAppend+'Answers', data=ret)
def preprocessQuestions(questions, vocab, fAppend, maxLen=45): ret = [] retImgIdx= [] for e in questions: retImgIdx += [e['image_index']] e = (e['question'].lower()[:-1] + ' ?').split() x = nlp.applyVocab(e, vocab).tolist() x += [0]*(maxLen - len(x)) ret += [x] ret = np.asarray(ret) retImgIdx = np.asarray(retImgIdx) with h5py.File('data/preprocessed/clevr.h5', 'a') as f: data = f.create_dataset(fAppend+'Questions', data=ret) data = f.create_dataset(fAppend+'ImageIdx', data=retImgIdx)
def preprocessPrograms(programs, vocab, fAppend, maxLen=45): #'_' quick vocab patch for k in list(vocab.keys()): vocab[k[2:]] = vocab[k] ret = [] retMask = [] for p in programs: p = p['program'] p = BTree(p).flat() p = nlp.applyVocab(p, vocab).tolist() retMask += [len(p)] p = [p + (45-len(p))*[0]] ret += p ret = np.asarray(ret).astype(np.int) retMask = np.asarray(retMask) with h5py.File('data/preprocessed/clevr.h5', 'a') as f: data = f.create_dataset(fAppend+'Programs', data=ret) data = f.create_dataset(fAppend+'ProgramMask', data=retMask)