def text_to_instance(self, sample) -> Instance: fields: Dict[str, Field] = {} # print(sample) tailored_history = sample['history'] tailored_tags = sample['tags'][-10:] context = '。'.join(tailored_history) # history = ' '.join(list(''.join(context))) # context = '[CLS] ' + context[-512:] text_tokens = self._tokenizer.tokenize(context[-510:]) fields['text'] = TextField(text_tokens, self._token_indexers) fileds_list = [] for sen in tailored_history: sen = ' '.join(sen) txt_token = self._tokenizer.tokenize(sen) ff = TextField(txt_token, self._token_indexers) fileds_list.append(ff) fields["label"] = MultiLabelField(list(sample['next_symp']), skip_indexing=True, num_labels=sym_size) # fields['symptoms'] = MultiLabelField(list(sample['his_symp']), skip_indexing=True, num_labels=sym_size) # fields['tags'] = MetadataField(tailored_tags) # fields['history'] = ListField(fileds_list) fields["future"] = MultiLabelField(list(sample['future_symp']), skip_indexing=True, num_labels=sym_size) return Instance(fields)
def test_as_tensor_returns_integer_tensor(self): f = MultiLabelField([2, 3], skip_indexing=True, label_namespace="test1", num_labels=5) tensor = f.as_tensor(f.get_padding_lengths()).data.cpu().numpy() numpy.testing.assert_array_almost_equal(tensor, numpy.array([0, 0, 1, 1, 0]))
def test_as_tensor_returns_integer_tensor(self): f = MultiLabelField([2, 3], skip_indexing=True, label_namespace="test1", num_labels=5) tensor = f.as_tensor(f.get_padding_lengths()).detach().cpu().tolist() assert tensor == [0, 0, 1, 1, 0] assert {type(item) for item in tensor} == {int}
def test_multilabel_field_empty_field_works(self): vocab = Vocabulary() vocab.add_token_to_namespace("label1", namespace="test_empty_labels") vocab.add_token_to_namespace("label2", namespace="test_empty_labels") f = MultiLabelField([], label_namespace="test_empty_labels") f.index(vocab) tensor = f.as_tensor(f.get_padding_lengths()).detach().cpu().numpy() numpy.testing.assert_array_almost_equal(tensor, numpy.array([0, 0]))
def test_multilabel_field_can_index_with_vocab(self): vocab = Vocabulary() vocab.add_token_to_namespace("rel0", namespace="rel_labels") vocab.add_token_to_namespace("rel1", namespace="rel_labels") vocab.add_token_to_namespace("rel2", namespace="rel_labels") f = MultiLabelField(["rel1", "rel0"], label_namespace="rel_labels") f.index(vocab) tensor = f.as_tensor(f.get_padding_lengths()).detach().cpu().numpy() numpy.testing.assert_array_almost_equal(tensor, numpy.array([1, 1, 0]))
def text_to_instance(self, tokens: List[Token], text: str, ID: Optional[str] = None, labels: Optional[List[str]] = None) -> Instance: ''' The tokens are expected to be pre-tokenised. :param tokens: The text that has been tokenised :param text: The text from the sample :param ID: The ID of the sample :param labels: A list of labels (can be an empty list which is associated implictly to the neutral class) :returns: An Instance object with all of the above enocded for a PyTorch model. ''' token_sequence = TextField(tokens, self._token_indexers) instance_fields: Dict[str, Field] = {'tokens': token_sequence} meta_fields = {} meta_fields["words"] = [x.text for x in tokens] meta_fields["text"] = text if ID is not None: meta_fields["ID"] = ID instance_fields["metadata"] = MetadataField(meta_fields) if labels is not None: instance_fields['labels'] = MultiLabelField(labels, label_namespace="labels") return Instance(instance_fields)
def text_to_instance(self, index: int, field_type: str): # type: ignore field = TextField( [Token(t) for t in ["The", "number", "is", str(index), "."]], token_indexers={"words": SingleIdTokenIndexer("words")}, ) return Instance({ "text": field, "label": LabelField(index, skip_indexing=True), "flag": FlagField(23), "index": IndexField(index % self.batch_size, field), "metadata": MetadataField( {"some_key": "This will not be logged as a histogram."}), "adjacency": AdjacencyField([(0, 1), (1, 2)], field), "multilabel": MultiLabelField(["l1", "l2"]), "span": SpanField(2, 3, field), "tensor": TensorField(torch.randn(2, 3)), })
def text_to_instance( self, context_tokens: List[Token], tokens: List[Token], tags: List[str] = None, intents: List[str] = None, dialog_act: Dict[str, Any] = None) -> Instance: # type: ignore """ We take `pre-tokenized` input here, because we don't have a tokenizer in this class. """ # pylint: disable=arguments-differ fields: Dict[str, Field] = {} # print([t.text for t in context_tokens]) fields["context_tokens"] = TextField(context_tokens, self._token_indexers) fields["tokens"] = TextField(tokens, self._token_indexers) fields["metadata"] = MetadataField({"words": [x.text for x in tokens]}) if tags is not None: fields["tags"] = SequenceLabelField(tags, fields["tokens"]) if intents is not None: fields["intents"] = MultiLabelField( intents, label_namespace="intent_labels") if dialog_act is not None: fields["metadata"] = MetadataField({ "words": [x.text for x in tokens], 'dialog_act': dialog_act }) else: fields["metadata"] = MetadataField({ "words": [x.text for x in tokens], 'dialog_act': {} }) return Instance(fields)
def test_class_variables_for_namespace_warnings_work_correctly(self): assert "text" not in MultiLabelField._already_warned_namespaces with self.assertLogs(logger="allennlp.data.fields.multilabel_field", level="WARNING"): _ = MultiLabelField(["test"], label_namespace="text") # We've warned once, so we should have set the class variable to False. assert "text" in MultiLabelField._already_warned_namespaces with pytest.raises(AssertionError): with self.assertLogs(logger="allennlp.data.fields.multilabel_field", level="WARNING"): _ = MultiLabelField(["test2"], label_namespace="text") # ... but a new namespace should still log a warning. assert "text2" not in MultiLabelField._already_warned_namespaces with self.assertLogs(logger="allennlp.data.fields.multilabel_field", level="WARNING"): _ = MultiLabelField(["test"], label_namespace="text2")
def add_label( self, instance: Instance, label: Union[List[str], List[int], str, int], to_field: str = "label", ) -> Optional[Instance]: """Includes the label field for classification into the instance data""" # "if not label:" fails for ndarrays this is why we explicitly check None if label is None: return instance field = None # check if multilabel and if adequate type if self._multilabel and isinstance(label, (list, numpy.ndarray)): label = label.tolist() if isinstance(label, numpy.ndarray) else label field = MultiLabelField(label, label_namespace=vocabulary.LABELS_NAMESPACE) # check if not multilabel and adequate type + check for empty strings if not self._multilabel and isinstance(label, (str, int)) and label: field = LabelField(label, label_namespace=vocabulary.LABELS_NAMESPACE) if not field: # We have label info but we cannot build the label field --> discard the instance return None instance.add_field(to_field, field) return instance
def index(self, ner_tags: List[str], as_label_field: bool) -> Union[List[int], MultiLabelField]: """ Takes in a list of tags ([B-PER, I-PER, O, O, B-LOC, I-LOC]), performs a regex match against the ner tags (.*-TAG), and generates the label accordingly Arguments: ner_tags (List[str]): The list of NER Tags as_label_field (bool): If True, returns a MultiLabelField, otherwise returns a list of tag indices Returns: indices (Union[List[int], MultiLabelField]): Returns either a list of indexed labels, or a MultiLabelField instance """ indices = set() for gold_tag in ner_tags: for tag in self.tags2ix: if re.match(f".*-{tag}", gold_tag) is not None: indices.add(self.tags2ix[tag]) if len(indices) > 0: indices = list(indices) else: indices = [len(self.tags2ix)] if as_label_field: indices = MultiLabelField(labels=indices, label_namespace=self.label_namespace, skip_indexing=True, num_labels=self.get_num_tags()) return indices
def text_to_instance(self, tokens: List[str], features: List[List[str]], tags: Optional[List[str]] = None, tag_label_namespace: Optional[str] = None): # pylint: disable=arguments-differ tokens: List[Token] = [Token(x) for x in tokens] sequence = TextField(tokens, self._token_indexers) instance_fields: Dict[str, Field] = {"tokens": sequence} metadata = {"words": [x.text for x in tokens]} if self._use_sentence_markers: sentence_markers = get_sentence_markers_from_tokens(tokens) metadata["sentence_markers"] = sentence_markers instance_fields["metadata"] = MetadataField(metadata) # now encode the features feature_list: List[MultiLabelField] = [] for feature in features: indexed_feature: List[int] = [ self._features_index_map[x] for x in feature if x in self._features_index_map ] feature_list.append( MultiLabelField(indexed_feature, label_namespace=self.feature_label_namespace, skip_indexing=True, num_labels=len(self._features_index_map))) instance_fields["features"] = ListField(feature_list) if tags: tag_label_namespace = tag_label_namespace or self.label_namespace converted_tags: List[str] = self.convert_tags(tags) instance_fields["tags"] = SequenceLabelField( converted_tags, sequence, tag_label_namespace) return Instance(instance_fields)
def _build_instance(self, tokens: List[Token], trigger_labels: List[str] = None, trigger_token_seqs: Dict[Tuple[int, int], str] = None, **metadata) -> Instance: if len(tokens) < 3 and self._filter is True: return None # Translate sentence if translation_service is provided if self._translation_service is not None: source_snt = self._detokenize(tokens) target_snt = self._translation_service(source_snt) tokens = self._tokenize(target_snt) fields: Dict[str, Field] = dict() # First, populate fields with provided metadata for key, value in metadata.items(): fields[key] = MetadataField(value) if trigger_token_seqs is not None: fields['trigger_token_seqs'] = MetadataField(trigger_token_seqs) # Building different discrete representations for text embedders. text_field = TextField(tokens, self._token_indexers) fields['text'] = text_field # Additionally, raw tokens are also stored for reverse mapping fields['tokens'] = MetadataField(tokens) # Build an Instance without annotations to use in inference phase. if trigger_labels is None: return Instance(fields) if self._translation_service is None: # If the sentence is translated we have no alignments # for token-level labels, so we skip them if len(trigger_labels) > len(tokens): truncate_len = len(tokens) trigger_labels = trigger_labels[:truncate_len] logger.warning('Truncated tokens detected. Truncating labels as well.') # Token-level trigger labels trigger_labels_field = SequenceLabelField(trigger_labels, text_field, self._trigger_label_namespace) if not self._sentence_level_only: fields['trigger_labels'] = trigger_labels_field # Sentence-level trigger label(s) # if not self._multi_label: # raise NotImplementedError token_tags = set(trigger_labels) sentence_trigger_labels = [tag for tag in token_tags if tag != 'O'] if not sentence_trigger_labels and self._null_label: sentence_trigger_labels = ['O'] fields['sentence_trigger_labels'] = MultiLabelField(sentence_trigger_labels, self._trigger_label_namespace) return Instance(fields)
def test_class_variables_for_namespace_warnings_work_correctly(self, caplog): with caplog.at_level(logging.WARNING, logger="allennlp.data.fields.multilabel_field"): assert "text" not in MultiLabelField._already_warned_namespaces _ = MultiLabelField(["test"], label_namespace="text") assert caplog.records # We've warned once, so we should have set the class variable to False. assert "text" in MultiLabelField._already_warned_namespaces caplog.clear() _ = MultiLabelField(["test2"], label_namespace="text") assert not caplog.records # ... but a new namespace should still log a warning. assert "text2" not in MultiLabelField._already_warned_namespaces caplog.clear() _ = MultiLabelField(["test"], label_namespace="text2") assert caplog
def text_to_instance(self, sentence: str, category_tag: str) -> Instance: categories = self.category_mapping.get(category_tag) tokenized_sentence = self.tokenizer.tokenize(sentence) sent_field = TextField(tokenized_sentence, self.token_indexers) return Instance({ 'sentences': sent_field, 'categories': MultiLabelField(categories) })
def test_multilabel_field_empty_field_works(self): vocab = Vocabulary() vocab.add_token_to_namespace("label1", namespace="test_empty_labels") vocab.add_token_to_namespace("label2", namespace="test_empty_labels") f = MultiLabelField([], label_namespace="test_empty_labels") f.index(vocab) tensor = f.as_tensor(f.get_padding_lengths()).detach().cpu().numpy() numpy.testing.assert_array_almost_equal(tensor, numpy.array([0, 0])) g = f.empty_field() g.index(vocab) tensor = g.as_tensor(g.get_padding_lengths()).detach().cpu().numpy() numpy.testing.assert_array_almost_equal(tensor, numpy.array([0, 0])) h = MultiLabelField( [0, 0, 1], label_namespace="test_empty_labels", num_labels=3, skip_indexing=True ) tensor = h.empty_field().as_tensor(None).detach().cpu().numpy() numpy.testing.assert_array_almost_equal(tensor, numpy.array([0, 0, 0]))
def text_to_instance(self, text: str, labels: List[str] = None) -> Instance: tokenized_text = self._tokenizer.tokenize(text) tokenized_text = TextField(tokenized_text, self._token_indexers) fields = {'text': tokenized_text} if labels: label_field = MultiLabelField(labels=labels) fields["labels"] = label_field return Instance(fields)
def modify_batch_instances(self, batch_instances): batch_instances = list(batch_instances) batch_size = len(batch_instances) negative_token_contexts = self.get_negative_contexts(batch_size) token_namespace = 'token_context' for instance, negs in zip(batch_instances, negative_token_contexts): instance.add_field( 'negative_context', MultiLabelField(negs, label_namespace=token_namespace)) return batch_instances
def text_to_instance(self, sample) -> Instance: fields: Dict[str, Field] = {} sen_num = self.pre_sen context = ' '.join(sample['history'][-sen_num:]) all_sentence = sample['history'][-sen_num:] # history = ' '.join(list(''.join(context))) history = ' '.join(self.seg.cut(context)) text_tokens = self._source_tokenizer.tokenize(history) text_tokens = text_tokens[-self._source_max_tokens:] text_tokens.insert(0, Token(START_SYMBOL)) text_tokens.append(Token(END_SYMBOL)) # response = ' '.join(sample['response']) response = ' '.join(self.seg.cut(sample['response'])) response_tokens = self._target_tokenizer.tokenize(response) response_tokens = response_tokens[:self._target_max_tokens] response_tokens.insert(0, Token(START_SYMBOL)) response_tokens.append(Token(END_SYMBOL)) fileds_list = [] for sen in all_sentence: sen = ' '.join(self.seg.cut(sen)) # sen = ' '.join(sen) txt_token = self._source_tokenizer.tokenize(sen) ff = TextField(txt_token, self._source_token_indexers) fileds_list.append(ff) fields['source_tokens'] = TextField(text_tokens, self._source_token_indexers) fields["next_sym"] = MultiLabelField(list(sample['next_symp']), skip_indexing=True, num_labels=total_entity + sen_num) fields['target_tokens'] = TextField(response_tokens, self._target_token_indexers) fields['his_symptoms'] = MultiLabelField(list(sample['his_symp']), skip_indexing=True, num_labels=total_entity + sen_num) fields['tags'] = MetadataField(sample['tags'][-sen_num:]) fields['history'] = ListField(fileds_list) # fields['dialog_index'] = MetadataField(sample['dialog_index']) return Instance(fields)
def text_to_instance( self, sentences: List[str], labels: List[str] = None, confidences: List[float] = None, additional_features: List[float] = None, ) -> Instance: if not self.predict: assert len(sentences) == len(labels) if confidences is not None: assert len(sentences) == len(confidences) if additional_features is not None: assert len(sentences) == len(additional_features) if self.use_sep: tokenized_sentences = [ self._tokenizer.tokenize(s)[:self.sent_max_len] + [Token("[SEP]")] for s in sentences ] sentences = [ list(itertools.chain.from_iterable(tokenized_sentences))[:-1] ] else: # Tokenize the sentences sentences = [ self._tokenizer.tokenize(sentence_text)[:self.sent_max_len] for sentence_text in sentences ] fields: Dict[str, Field] = {} fields["sentences"] = ListField([ TextField(sentence, self._token_indexers) for sentence in sentences ]) if labels is not None: if isinstance(labels[0], list): fields["labels"] = ListField( [MultiLabelField(label) for label in labels]) else: # make the labels strings for easier identification of the neutral label # probably not strictly necessary if self.sci_sum: fields["labels"] = ArrayField(np.array(labels)) else: fields["labels"] = ListField([ LabelField(str(label) + "_label") for label in labels ]) if confidences is not None: fields['confidences'] = ArrayField(np.array(confidences)) if additional_features is not None: fields["additional_features"] = ArrayField( np.array(additional_features)) return Instance(fields)
def _add_label( self, instance: Instance, label: Union[List[str], List[int], str, int], to_field: str = "label", ) -> Instance: """Adds the label field for classification into the instance data Helper function for the child's `self.featurize` method. Parameters ---------- instance Add a label field to this instance label The label data to_field Name space of the field Returns ------- instance If `label` is not None, return `instance` with the a label field added. Otherwise return just the given `instance`. Raises ------ FeaturizeError If the label is an empty string or does not match the type: - (str, int) for single label - (list, np.array) for multi label """ # "if not label:" fails for ndarrays, this is why we explicitly check for None if label is None: return instance field = None # check if multilabel and if adequate type if self._multilabel and isinstance(label, (list, numpy.ndarray)): label = label.tolist() if isinstance(label, numpy.ndarray) else label field = MultiLabelField( label, label_namespace=vocabulary.LABELS_NAMESPACE) # check if not multilabel and adequate type + check for empty strings if not self._multilabel and isinstance(label, (str, int)) and label: field = LabelField(label, label_namespace=vocabulary.LABELS_NAMESPACE) if not field: # We have label info but we cannot build the label field --> discard the instance raise FeaturizeError( f"Cannot create label field for `label={label}`!") instance.add_field(to_field, field) return instance
def text_to_instance(self, text: str, label: str = None) -> Instance: # type: ignore # pylint: disable=arguments-differ tokenized = self._tokenizer.tokenize(text) if not (text and tokenized): return None fields = {'tokens': TextField(tokenized, self._token_indexers)} if label is not None: label = label.split(', ') fields['label'] = MultiLabelField(label) return Instance(fields)
def text_to_instance(self, tokens: List[Token], labels: Sequence[int] = None, labels_aspect: Sequence[int] = None, domain: str = None) -> Instance: sentence_field = TextField(tokens, self.token_indexers) # sentence and indexer fields = {'sentence': sentence_field} if domain != None: fields['domain'] = LabelField(label=DOMAIN.index(domain), label_namespace='domain-labels', skip_indexing=True) in_domain = domain == self.target_domain fields['sample_weight'] = ArrayField( np.array([1.0 if in_domain else self.out_domain_weight])) if labels: label_field = MultiLabelField(labels=labels, label_namespace='motive-labels', skip_indexing=True, num_labels=NUM_MOTIVES + 1) fields['labels'] = label_field if self.multitask and labels_aspect: num_aspects = max( [c.shape[0] for c in self.label_counts_aspect.values()]) label_field = MultiLabelField(labels=labels_aspect, label_namespace='aspect-labels', skip_indexing=True, num_labels=num_aspects) fields['labels_aspect'] = label_field fields['metadata'] = MetadataField({ 'label_prior': self.label_prior, 'label_prior_aspect': self.label_prior_aspect[domain] }) return Instance(fields)
def text_to_instance(self, sample) -> Instance: #箭头是注释表明返回值是什么类型 fields: Dict[str, Field] = {} tailored_history = sample['history'] context = '。'.join(tailored_history) text_tokens = self._tokenizer.tokenize(context[-510:]) fields['text'] = TextField(text_tokens, self._token_indexers) fileds_list = [] for sen in tailored_history: sen = ' '.join(sen) txt_token = self._tokenizer.tokenize(sen) ff = TextField(txt_token, self._token_indexers) fileds_list.append(ff) fields["label"] = MultiLabelField(list(sample['next_sym']), skip_indexing=True, num_labels=sym_size) fields["future"] = MultiLabelField(list(sample['future']), skip_indexing=True, num_labels=sym_size) return Instance(fields)
def text_to_instance(self, graf_tokens: List[Token], labels: List[str] = None) -> Instance: graf_field = TextField(graf_tokens, self.token_indexers) metadata = MetadataField(({"graf_words": graf_tokens})) fields = {"graf": graf_field, "metadata": metadata} if labels is not None: label_field = MultiLabelField(labels) fields["label"] = label_field return Instance(fields)
def test_multilabel_field_empty_field_works(self): vocab = Vocabulary() vocab.add_token_to_namespace("label1", namespace="test_empty_labels") vocab.add_token_to_namespace("label2", namespace="test_empty_labels") f = MultiLabelField([], label_namespace="test_empty_labels") f.index(vocab) tensor = f.as_tensor(f.get_padding_lengths()).data.cpu().numpy() numpy.testing.assert_array_almost_equal(tensor, numpy.array([0, 0]))
def get_new_instance(instance: Instance, tags: List[str], reader: DatasetReader) -> Instance: # first copy over the tokens new_instance: Dict[str, Field] = {} tokens = instance.fields["tokens"].tokens sequence = TextField(tokens, reader._token_indexers) new_instance["tokens"] = sequence # now copy the tags new_instance["tags"] = SequenceLabelField(tags, sequence, reader.label_namespace) # now copy the handcrafted features feature_list: List[MultiLabelField] = [] for feature in instance.fields["features"]: labels: List[int] = feature.labels feature_list.append(MultiLabelField( labels, label_namespace=reader.feature_label_namespace, skip_indexing=True, num_labels=len(reader._features_index_map)) ) new_instance["features"] = ListField(feature_list) return Instance(new_instance)
def text_to_instance(self, sentences: List[str], category_tag: str = None) -> Instance: sentence_fields = [] for sentence in sentences: sent_field = LazyTextFiled(text=sentence, tokenizer_name="sentences", token_indexers=self.token_indexers) sentence_fields.append(sent_field) dt = { 'sentences': ListField(sentence_fields), } if category_tag is not None: categories = self.category_mapping.get(category_tag) dt['categories'] = MultiLabelField(categories) return Instance(dt)
def text_to_instance(self, text: Union[str, List[Token]], label: str = None) -> Instance: if isinstance(text, str): tokenized_text = self.tokenize(text) else: tokenized_text = text if len(tokenized_text) > self._max_length: tokenized_text = tokenized_text[:self._max_length] text_field = TextField(tokenized_text, self._token_indexers) fields = {'text': text_field} if label is not None: if self._multi_label: fields['label'] = MultiLabelField(label) else: fields['label'] = LabelField(label) return Instance(fields)
def text_to_instance( self, text: str, labels: List[Union[str, int]] = None ) -> Instance: # type: ignore """ # Parameters text : `str`, required. The text to classify labels : `List[Union[str, int]]`, optional, (default = `None`). The labels for this text. # Returns An `Instance` containing the following fields: - tokens (`TextField`) : The tokens in the sentence or phrase. - label (`MultiLabelField`) : The labels of the sentence or phrase. """ fields: Dict[str, Field] = {} if self._segment_sentences: sentences: List[Field] = [] sentence_splits = self._sentence_segmenter.split_sentences(text) for sentence in sentence_splits: word_tokens = self._tokenizer.tokenize(sentence) if self._max_sequence_length is not None: word_tokens = self._truncate(word_tokens) sentences.append(TextField(word_tokens, self._token_indexers)) fields["tokens"] = ListField(sentences) else: tokens = self._tokenizer.tokenize(text) if self._max_sequence_length is not None: tokens = self._truncate(tokens) fields["tokens"] = TextField(tokens, self._token_indexers) if labels is not None: fields["labels"] = MultiLabelField( labels, skip_indexing=self._skip_label_indexing, num_labels=self._num_labels ) return Instance(fields)
def text_to_instance(self, tokens: List[Token], entities: List = None, relations: List = None) -> Instance: sequence = TextField(tokens, self._token_indexers) instance_fields: Dict[str, Field] = {"tokens": sequence} words = [x.text for x in tokens] spans = [] for start, end in enumerate_spans(words, max_span_width=self._max_span_width): assert start >= 0 assert end >= 0 spans.append(SpanField(start, end, sequence)) span_field = ListField(spans) span_tuples = [(span.span_start, span.span_end) for span in spans] instance_fields["spans"] = span_field ner_labels = [[] for i in span_tuples] ner_list = [((e.start, e.end), e.role) for e in entities] for span, label in ner_list: if self._too_long(span): continue ix = span_tuples.index(span) # if "" in ner_labels[ix]: # ner_labels[ix].remove("") ner_labels[ix] += [label] instance_fields["ner_labels"] = ListField([ MultiLabelField(entry, label_namespace=self.label_namespace) for entry in ner_labels ]) metadata = {"words": words, "relations": relations} instance_fields["metadata"] = MetadataField(metadata) return Instance(instance_fields)
def test_as_tensor_returns_integer_tensor(self): f = MultiLabelField([2, 3], skip_indexing=True, label_namespace="test1", num_labels=5) tensor = f.as_tensor(f.get_padding_lengths()).detach().cpu().tolist() assert tensor == [0, 0, 1, 1, 0] assert set([type(item) for item in tensor]) == set([int])