Exemplo n.º 1
0
 def sample_pairs(self, gr, batch_size, shuffle=True, once=False):
     """ A generator that produces random pairs from the dataset """
     try:
         id_N = int((len(gr['si03d']) + batch_size - 1) / batch_size)
         ids = list(range(id_N))
         while True:
             if shuffle:
                 # XXX: We never swap samples between batches, does it matter?
                 random.shuffle(ids)
             for i in ids:
                 sl = slice(i * batch_size, (i + 1) * batch_size)
                 ogr = graph_input_slice(gr, sl)
                 # s0, s1 are larger than the rest, unnerving keras
                 ogr.pop('s0', None)
                 ogr.pop('s1', None)
                 ogr['se03d'] = self.emb.map_jset(ogr['sj03d'])
                 ogr['se13d'] = self.emb.map_jset(ogr['sj13d'])
                 # print(sl)
                 # print('<<0>>', ogr['sj0'], ogr['se0'])
                 # print('<<1>>', ogr['sj1'], ogr['se1'])
                 yield ogr
             if once:
                 break
     except Exception:
         traceback.print_exc()
Exemplo n.º 2
0
 def sample_pairs(self, gr, batch_size, shuffle=True, once=False):
     """ A generator that produces random pairs from the dataset """
     try:
         id_N = int((len(gr['si03d']) + batch_size-1) / batch_size)
         ids = list(range(id_N))
         while True:
             if shuffle:
                 # XXX: We never swap samples between batches, does it matter?
                 random.shuffle(ids)
             for i in ids:
                 sl = slice(i * batch_size, (i+1) * batch_size)
                 ogr = graph_input_slice(gr, sl)
                 # s0, s1 are larger than the rest, unnerving keras
                 ogr.pop('s0', None)
                 ogr.pop('s1', None)
                 ogr['se03d'] = self.emb.map_jset(ogr['sj03d'])
                 ogr['se13d'] = self.emb.map_jset(ogr['sj13d'])
                 # print(sl)
                 # print('<<0>>', ogr['sj0'], ogr['se0'])
                 # print('<<1>>', ogr['sj1'], ogr['se1'])
                 yield ogr
             if once:
                 break
     except Exception:
         traceback.print_exc()
Exemplo n.º 3
0
 def sample_pairs(self, gr, batch_size, shuffle=True, once=False):
     """ A generator that produces random pairs from the dataset """
     try:
         id_N = int((len(gr['si0']) + batch_size - 1) / batch_size)
         ids = list(range(id_N))
         while True:
             if shuffle:
                 # XXX: We never swap samples between batches, does it matter?
                 random.shuffle(ids)
             for i in ids:
                 sl = slice(i * batch_size, (i + 1) * batch_size)
                 ogr = graph_input_slice(gr, sl)
                 ogr['se0'] = self.emb.map_jset(ogr['sj0'])
                 ogr['se1'] = self.emb.map_jset(ogr['sj1'])
                 # print(sl)
                 # print('<<0>>', ogr['sj0'], ogr['se0'])
                 # print('<<1>>', ogr['sj1'], ogr['se1'])
                 if 'score' in ogr:
                     y = ogr.pop('score')
                 else:
                     y = ogr.pop('classes')
                 yield (ogr, y)
             if once:
                 break
     except Exception:
         traceback.print_exc()
Exemplo n.º 4
0
def sample_pairs(gr, c, batch_size, once=False):
    """ A generator that produces random pairs from the dataset """
    # XXX: We drop the last few samples if (1e6 % batch_size != 0)
    # XXX: We never swap samples between batches, does it matter?
    ids = range(int(len(gr['si0']) / batch_size))
    while True:
        random.shuffle(ids)
        for i in ids:
            sl = slice(i * batch_size, (i+1) * batch_size)
            ogr = graph_input_slice(gr, sl)
            # TODO: Add support for discarding too long samples?
            pad_graph(ogr)
            yield ogr
        if once:
            break
Exemplo n.º 5
0
 def sample_pairs(self, gr, batch_size, shuffle=True, once=False):
     """ A generator that produces random pairs from the dataset """
     try:
         id_N = int((len(gr['si0']) + batch_size-1) / batch_size)
         ids = list(range(id_N))
         while True:
             if shuffle:
                 # XXX: We never swap samples between batches, does it matter?
                 random.shuffle(ids)
             for i in ids:
                 sl = slice(i * batch_size, (i+1) * batch_size)
                 ogr = graph_input_slice(gr, sl)
                 # TODO: Add support for discarding too long samples?
                 pad_graph(ogr, s0pad=self.s0pad, s1pad=self.s1pad)
                 ogr['se0'] = self.emb.map_jset(ogr['sj0'])
                 ogr['se1'] = self.emb.map_jset(ogr['sj1'])
                 # print(sl)
                 # print('<<0>>', ogr['sj0'], ogr['se0'])
                 # print('<<1>>', ogr['sj1'], ogr['se1'])
                 yield ogr
             if once:
                 break
     except Exception:
         traceback.print_exc()