def load_set(files, vocab=None, skip_unlabeled=True, spad=spad): def load_file(fname, skip_unlabeled=True): # XXX: ugly logic if 'sick2014' in fname: return loader.load_sick2014(fname) else: return loader.load_sts(fname, skip_unlabeled=skip_unlabeled) try: strtype = basestring except NameError: strtype = str if isinstance(files, strtype): s0, s1, y = load_file(files, skip_unlabeled=skip_unlabeled) else: s0, s1, y = loader.concat_datasets( [load_file(d, skip_unlabeled=skip_unlabeled) for d in files]) if vocab is None: vocab = Vocabulary(s0 + s1) si0 = vocab.vectorize(s0, spad=spad) si1 = vocab.vectorize(s1, spad=spad) f0, f1 = nlp.sentence_flags(s0, s1, spad, spad) gr = graph_input_sts(si0, si1, y, f0, f1, s0, s1) return (s0, s1, y, vocab, gr)
def load_set(files, vocab=None, skip_unlabeled=True): def load_file(fname, skip_unlabeled=True): # XXX: ugly logic if 'sick2014' in fname: return loader.load_sick2014(fname) else: return loader.load_sts(fname, skip_unlabeled=skip_unlabeled) s0, s1, y = loader.concat_datasets([load_file(d, skip_unlabeled=skip_unlabeled) for d in files]) if vocab is None: vocab = Vocabulary(s0 + s1) si0 = vocab.vectorize(s0, spad=spad) si1 = vocab.vectorize(s1, spad=spad) f0, f1 = nlp.sentence_flags(s0, s1, spad, spad) gr = graph_input_sts(si0, si1, y, f0, f1) return (s0, s1, y, vocab, gr)
def load_set(glove, globmask, loadfun=loader.load_sts): s0, s1, labels = loader.concat_datasets([loadfun(d) for d in glob.glob(globmask)]) print('(%s) Loaded dataset: %d' % (globmask, len(s0))) e0, e1, s0, s1, labels = loader.load_embedded(glove, s0, s1, labels) return ([e0, e1], labels)
def load_set(glove, globmask, loadfun=loader.load_sts): s0, s1, labels = loader.concat_datasets( [loadfun(d) for d in glob.glob(globmask)]) print('(%s) Loaded dataset: %d' % (globmask, len(s0))) e0, e1, s0, s1, labels = loader.load_embedded(glove, s0, s1, labels) return ([e0, e1], labels)