Пример #1
0
def load_set(files, vocab=None, skip_unlabeled=True, spad=spad):
    def load_file(fname, skip_unlabeled=True):
        # XXX: ugly logic
        if 'sick2014' in fname:
            return loader.load_sick2014(fname)
        else:
            return loader.load_sts(fname, skip_unlabeled=skip_unlabeled)

    try:
        strtype = basestring
    except NameError:
        strtype = str
    if isinstance(files, strtype):
        s0, s1, y = load_file(files, skip_unlabeled=skip_unlabeled)
    else:
        s0, s1, y = loader.concat_datasets(
            [load_file(d, skip_unlabeled=skip_unlabeled) for d in files])

    if vocab is None:
        vocab = Vocabulary(s0 + s1)

    si0 = vocab.vectorize(s0, spad=spad)
    si1 = vocab.vectorize(s1, spad=spad)
    f0, f1 = nlp.sentence_flags(s0, s1, spad, spad)
    gr = graph_input_sts(si0, si1, y, f0, f1, s0, s1)

    return (s0, s1, y, vocab, gr)
Пример #2
0
def load_set(files, vocab=None, skip_unlabeled=True):
    def load_file(fname, skip_unlabeled=True):
        # XXX: ugly logic
        if 'sick2014' in fname:
            return loader.load_sick2014(fname)
        else:
            return loader.load_sts(fname, skip_unlabeled=skip_unlabeled)
    s0, s1, y = loader.concat_datasets([load_file(d, skip_unlabeled=skip_unlabeled) for d in files])

    if vocab is None:
        vocab = Vocabulary(s0 + s1)

    si0 = vocab.vectorize(s0, spad=spad)
    si1 = vocab.vectorize(s1, spad=spad)
    f0, f1 = nlp.sentence_flags(s0, s1, spad, spad)
    gr = graph_input_sts(si0, si1, y, f0, f1)

    return (s0, s1, y, vocab, gr)
Пример #3
0
def load_set(glove, globmask, loadfun=loader.load_sts):
    s0, s1, labels = loader.concat_datasets([loadfun(d) for d in glob.glob(globmask)])
    print('(%s) Loaded dataset: %d' % (globmask, len(s0)))
    e0, e1, s0, s1, labels = loader.load_embedded(glove, s0, s1, labels)
    return ([e0, e1], labels)
Пример #4
0
def load_set(glove, globmask, loadfun=loader.load_sts):
    s0, s1, labels = loader.concat_datasets(
        [loadfun(d) for d in glob.glob(globmask)])
    print('(%s) Loaded dataset: %d' % (globmask, len(s0)))
    e0, e1, s0, s1, labels = loader.load_embedded(glove, s0, s1, labels)
    return ([e0, e1], labels)