示例#1
0
def preprocessAnswers(answers, vocab, fAppend):
   ret = []
   for e in answers:
      e = e['answer'].lower().split()
      x = nlp.applyVocab(e, vocab).tolist()
      ret += [x]

   ret = np.asarray(ret)
   with h5py.File('data/preprocessed/clevr.h5', 'a') as f:
      data = f.create_dataset(fAppend+'Answers', data=ret)
示例#2
0
def preprocessQuestions(questions, vocab, fAppend, maxLen=45):
   ret = []
   retImgIdx= []
   for e in questions:
      retImgIdx += [e['image_index']]
      e = (e['question'].lower()[:-1] + ' ?').split()
      x = nlp.applyVocab(e, vocab).tolist()
      x += [0]*(maxLen - len(x))
      ret += [x]

   ret = np.asarray(ret)
   retImgIdx = np.asarray(retImgIdx)
   with h5py.File('data/preprocessed/clevr.h5', 'a') as f:
      data = f.create_dataset(fAppend+'Questions', data=ret)
      data = f.create_dataset(fAppend+'ImageIdx', data=retImgIdx)
def preprocessPrograms(programs, vocab, fAppend, maxLen=45):
   #'_' quick vocab patch
   for k in list(vocab.keys()):
      vocab[k[2:]] = vocab[k]

   ret = []
   retMask = []
   for p in programs:
      p = p['program']
      p = BTree(p).flat()
      p = nlp.applyVocab(p, vocab).tolist()
      retMask += [len(p)]
      p = [p + (45-len(p))*[0]]
      ret += p 
   ret = np.asarray(ret).astype(np.int)
   retMask = np.asarray(retMask)
   with h5py.File('data/preprocessed/clevr.h5', 'a') as f:
      data = f.create_dataset(fAppend+'Programs', data=ret)
      data = f.create_dataset(fAppend+'ProgramMask', data=retMask)