def _encode_new_dataset(self, dataset, params: EncoderParams): encoded_dataset = RepertoireDataset(repertoires=dataset.repertoires, params=dataset.params, metadata_file=dataset.metadata_file) encoded_repertoires, labels = self._encode_repertoires(dataset, params) feature_annotations = self._get_feature_info() encoded_dataset.add_encoded_data(EncodedData( examples=encoded_repertoires, labels=labels, feature_names=list(feature_annotations["sequence_id"]), feature_annotations=feature_annotations, example_ids=[repertoire.identifier for repertoire in dataset.get_data()], encoding=MatchedSequencesEncoder.__name__ )) return encoded_dataset
def _encode_new_dataset(self, dataset, params: EncoderParams): self._load_regex_df() encoded_dataset = RepertoireDataset( repertoires=dataset.repertoires, params=dataset.params, metadata_file=dataset.metadata_file) feature_annotations = self._get_feature_info() encoded_repertoires, labels = self._encode_repertoires(dataset, params) encoded_dataset.add_encoded_data( EncodedData(examples=encoded_repertoires, example_ids=list( dataset.get_metadata(["subject_id"]).values())[0], feature_names=list(feature_annotations["chain_id"]), feature_annotations=feature_annotations, labels=labels, encoding=MatchedRegexEncoder.__name__)) return encoded_dataset