Пример #1
0
def get_fields(data_type):
    """
    Args:
        n_src_features: the number of source features to
            create `torchtext.data.Field` for.
        n_tgt_features: the number of target features to
            create `torchtext.data.Field` for.
        data_type: concat / query / hier
    Returns:
        A dictionary whose keys are strings and whose values are the
        corresponding Field objects.
    """
    return TextDataset.get_fields(data_type)
Пример #2
0
def get_fields(data_type, n_src_features, n_tgt_features):
    """
    Args:
        data_type: type of the source input. Options are [text|img|audio].
        n_src_features: the number of source features to
            create `torchtext.data.Field` for.
        n_tgt_features: the number of target features to
            create `torchtext.data.Field` for.

    Returns:
        A dictionary whose keys are strings and whose values are the
        corresponding Field objects.
    """
    return TextDataset.get_fields(n_src_features, n_tgt_features)
Пример #3
0
def get_fields(data_type, n_src_features, n_tgt_features, n_ans_features):
    """
    Args:
        data_type: type of the source input. Options are [text|img|audio].
        n_src_features: the number of source features to
            create `torchtext.data.Field` for.
        n_tgt_features: the number of target features to
            create `torchtext.data.Field` for.

    Returns:
        A dictionary whose keys are strings and whose values are the
        corresponding Field objects.
    """
    if data_type == 'text':
        return TextDataset.get_fields(n_src_features, n_tgt_features,
                                      n_ans_features)
    else:
        raise ValueError("Data type not implemented")
Пример #4
0
def load_fields_from_vocab(vocab, data_type="text"):
    """
    Load Field objects from `vocab.pt` file.
    """
    vocab = dict(vocab)

    n_src_features = len(collect_features(vocab, 'src'))
    n_tgt_features = len(collect_features(vocab, 'tgt'))
    n_ans_features = len(collect_features(vocab, 'ans'))

    #logger.info("n_src_features " + str(n_src_features))
    #logger.info("n_tgt_features" + str(n_tgt_features))
    #logger.info("n_ans_features " + str(n_ans_features))

    fields = TextDataset.get_fields(n_src_features, n_tgt_features,
                                    n_ans_features)
    #fields = get_fields(data_type, n_src_features, n_tgt_features)
    for k, v in vocab.items():
        # Hack. Can't pickle defaultdict :(
        v.stoi = defaultdict(lambda: 0, v.stoi)
        fields[k].vocab = v
    return fields