def make_embedding(emb_hparams, token_to_id_map): """Optionally loads embedding from file (if provided), and returns an instance of :class:`texar.tf.data.Embedding`. """ embedding = None if emb_hparams["file"] is not None and len(emb_hparams["file"]) > 0: embedding = Embedding(token_to_id_map, emb_hparams) return embedding
def make_embedding(src_emb_hparams, src_token_to_id_map, tgt_emb_hparams=None, tgt_token_to_id_map=None, emb_init_share=False): """Optionally loads source and target embeddings from files (if provided), and returns respective :class:`texar.tf.data.Embedding` instances. """ src_embedding = MonoTextData.make_embedding(src_emb_hparams, src_token_to_id_map) if emb_init_share: tgt_embedding = src_embedding else: tgt_emb_file = tgt_emb_hparams["file"] tgt_embedding = None if tgt_emb_file is not None and tgt_emb_file != "": tgt_embedding = Embedding(tgt_token_to_id_map, tgt_emb_hparams) return src_embedding, tgt_embedding
def _default_mono_text_dataset_hparams(): """Returns hyperparameters of a mono text dataset with default values. See :meth:`texar.tf.MonoTextData.default_hparams` for details. """ return { "files": [], "compression_type": None, "vocab_file": "", "embedding_init": Embedding.default_hparams(), "delimiter": " ", "max_seq_length": None, "length_filter_mode": "truncate", "pad_to_max_seq_length": False, "bos_token": SpecialTokens.BOS, "eos_token": SpecialTokens.EOS, "other_transformations": [], "variable_utterance": False, "utterance_delimiter": "|||", "max_utterance_cnt": 5, "data_name": None, "@no_typecheck": ["files"] }
def make_embedding(hparams, vocabs): """Optionally loads embeddings from files (if provided), and returns respective :class:`texar.tf.data.Embedding` instances. """ if not isinstance(hparams, (list, tuple)): hparams = [hparams] embs = [] for i, hparams_i in enumerate(hparams): if not _is_text_data(hparams_i["data_type"]): embs.append(None) continue emb_shr = hparams_i["embedding_init_share_with"] if emb_shr is not None: if emb_shr >= i: MultiAlignedData._raise_sharing_error( i, emb_shr, "embedding_init_share_with") if not embs[emb_shr]: raise ValueError("Cannot share embedding with dataset %d " "which does not have an embedding." % emb_shr) if emb_shr != hparams_i["vocab_share_with"]: raise ValueError("'embedding_init_share_with' != " "vocab_share_with. embedding_init can " "be shared only when vocab is shared.") emb = embs[emb_shr] else: emb = None emb_file = hparams_i["embedding_init"]["file"] if emb_file and emb_file != "": emb = Embedding(vocabs[i].token_to_id_map_py, hparams_i["embedding_init"]) embs.append(emb) return embs