def __init__(self, idiom_vector_path: str, dropout: float, vocab: Vocabulary, content_embedder: TextFieldEmbedder) -> None: super().__init__(vocab) self.content_embedder = content_embedder idiom_list, idiom_vectors = [], [] with open(idiom_vector_path) as fh: for line in fh: idiom_list.append( line.strip().split()[0] ) idiom_vectors.append( list(map(float, line.strip().split()[1:])) ) self.option_embedder = modules.Embedding( num_embeddings=len(idiom_list), embedding_dim=len(idiom_vectors[0]), projection_dim=self.content_embedder.get_output_dim(), # 使用 预训练的成语向量 weight=torch.tensor(idiom_vectors, dtype=torch.float, requires_grad=True) ) self.dropout = nn.Dropout(dropout) self.scorer = nn.Linear(self.content_embedder.get_output_dim(), 1) self.loss = nn.CrossEntropyLoss() self.acc = CategoricalAccuracy()
def __init__(self, idiom_vector_path: str, idiom_graph_path: str, dropout: float, vocab: Vocabulary, content_embedder: TextFieldEmbedder, neighbor_num: int = 7, mode: List[str] = None) -> None: super().__init__(vocab) self.content_embedder = content_embedder idiom_list, idiom_vectors = [], [] with open(idiom_vector_path) as fh: for line in fh: idiom_list.append(line.strip().split()[0]) idiom_vectors.append(list(map(float, line.strip().split()[1:]))) self.graph_embedder = GraphEmbedder(idiom_graph_path, neighbor_num=neighbor_num, drop_neighbor=False) embedding_dim = self.content_embedder.get_output_dim() self.option_embedder = modules.Embedding( num_embeddings=len(idiom_list), embedding_dim=embedding_dim, # 使用 预训练的成语向量 # weight=torch.FloatTensor(idiom_vectors) ) self.dropout = nn.Dropout(dropout) self.scorer = nn.Linear(self.content_embedder.get_output_dim(), 1) embedding_size = self.content_embedder.get_output_dim() self.neighbour_reasoner = StackedSelfAttentionEncoder( input_dim=embedding_size, hidden_dim=embedding_size, projection_dim=embedding_size, feedforward_hidden_dim=embedding_size, num_layers=1, num_attention_heads=2, use_positional_encoding=False) self.option_encoder = FirstVecEncoder(embedding_dim=embedding_size) self.option_reasoner = StackedSelfAttentionEncoder( input_dim=embedding_size, hidden_dim=embedding_size, projection_dim=embedding_size, feedforward_hidden_dim=embedding_size, num_layers=1, num_attention_heads=2, use_positional_encoding=False) if mode is None: mode = ['raw', 'ocn', 'nn'] else: for item in mode: assert item in ['raw', 'ocn', 'nn'], f"{item} is invalid" self.mode = mode self.data_merger = FeedForward( input_dim=embedding_size * len(mode), num_layers=1, hidden_dims=embedding_size, activations=Activation.by_name('linear')(), dropout=0.1) self.loss = nn.CrossEntropyLoss() self.acc = CategoricalAccuracy()
def __init__(self, idiom_vector_path: str, dropout: float, vocab: Vocabulary, content_embedder: TextFieldEmbedder, option_vector_encoder: Seq2VecEncoder, use_pretrained: bool = False, use_idiom_embedding: bool = True, use_idiom_text: bool = False, use_idiom_definition: bool = False, use_reasoner: bool = False, idiom_vector_size: int = None) -> None: super().__init__(vocab) if idiom_vector_size is not None and use_pretrained: raise ValueError( "When `use_pretrained` is True, `idiom_vector_size` must be None." ) if not use_idiom_embedding and use_pretrained: raise ValueError( "use_pretrained=True but use_idiom_embedding=False.") # suppose to be BERT model self.content_embedder = content_embedder self.option_vector_encoder = option_vector_encoder self.use_idiom_embedding = use_idiom_embedding if self.use_idiom_embedding: idiom_list, idiom_vectors = [], [] with open(idiom_vector_path) as fh: for line in fh: idiom_list.append(line.strip().split()[0]) idiom_vectors.append( list(map(float, line.strip().split()[1:]))) self.use_pretrained = use_pretrained if self.use_pretrained: self.option_embedder = modules.Embedding( num_embeddings=len(idiom_list), embedding_dim=len(idiom_vectors[0]), projection_dim=self.content_embedder.get_output_dim(), # 使用 预训练的成语向量 weight=torch.FloatTensor(idiom_vectors)) else: embedding_dim = idiom_vector_size or len(idiom_vectors[0]) self.option_embedder = modules.Embedding( num_embeddings=len(idiom_list), embedding_dim=embedding_dim, projection_dim=self.content_embedder.get_output_dim(), # 使用 预训练的成语向量 # weight=torch.FloatTensor(idiom_vectors) ) self.option_vector_encoder = option_vector_encoder if self.use_idiom_embedding: idiom_merger_in_features = self.option_embedder.get_output_dim() else: idiom_merger_in_features = 0 self.use_idiom_text = use_idiom_text if self.use_idiom_text: idiom_merger_in_features += self.option_vector_encoder.get_output_dim( ) self.use_idiom_definition = use_idiom_definition if self.use_idiom_definition: idiom_merger_in_features += self.option_vector_encoder.get_output_dim( ) self.option_merger = nn.Linear( in_features=idiom_merger_in_features, out_features=self.content_embedder.get_output_dim(), bias=True) self.dropout = nn.Dropout(dropout) self.scorer = nn.Linear(self.content_embedder.get_output_dim(), 1) self.use_reasoner = use_reasoner if use_reasoner: embedding_size = self.content_embedder.get_output_dim() self.option_reasoner = StackedSelfAttentionEncoder( input_dim=embedding_size, hidden_dim=embedding_size, projection_dim=embedding_size, feedforward_hidden_dim=embedding_size, num_layers=1, num_attention_heads=2, use_positional_encoding=False) self.loss = nn.CrossEntropyLoss() self.acc = CategoricalAccuracy()
def __init__(self, idiom_vector_path: str, dropout: float, vocab: Vocabulary, content_embedder: TextFieldEmbedder, use_pretrained: bool = False, use_reasoner: bool = False, idiom_vector_size: int = None, denoise_mode: str = 'soft', denoise_lambda: float = None, teacher_model_path: str = None, teacher_mode: str = None) -> None: super().__init__(vocab) self.content_embedder = content_embedder if idiom_vector_size is not None and use_pretrained: raise ValueError( "When `use_pretrained` is True, `idiom_vector_size` must be None." ) if teacher_mode is not None: teacher_mode = teacher_mode.lower() assert teacher_mode in ('initialization', 'teacher'), ( f'teacher_mode ({teacher_mode}) ' 'not in ("initialization", "teacher").') if teacher_mode is not None and teacher_model_path is None: raise ValueError( "Please set teacher_model_path when teacher_mode is not None.") self.teacher_mode = teacher_mode self.teacher_model_path = teacher_model_path self.teacher = self.load_teacher() idiom_list, idiom_vectors = [], [] with open(idiom_vector_path) as fh: for line in fh: idiom_list.append(line.strip().split()[0]) idiom_vectors.append(list(map(float, line.strip().split()[1:]))) self.use_pretrained = use_pretrained if self.use_pretrained: self.option_embedder = modules.Embedding( num_embeddings=len(idiom_list), embedding_dim=len(idiom_vectors[0]), projection_dim=self.content_embedder.get_output_dim(), # 使用 预训练的成语向量 weight=torch.FloatTensor(idiom_vectors)) else: embedding_dim = idiom_vector_size or len(idiom_vectors[0]) self.option_embedder = modules.Embedding( num_embeddings=len(idiom_list), embedding_dim=embedding_dim, projection_dim=self.content_embedder.get_output_dim(), # 使用 预训练的成语向量 # weight=torch.FloatTensor(idiom_vectors) ) self.dropout = nn.Dropout(dropout) self.scorer = nn.Linear(self.content_embedder.get_output_dim(), 1) if use_reasoner: logger.info(f"{type(self)} always uses the reasoner.") self.use_reasoner = True embedding_size = self.content_embedder.get_output_dim() self.option_reasoner = StackedSelfAttentionEncoder( input_dim=embedding_size, hidden_dim=embedding_size, projection_dim=embedding_size, feedforward_hidden_dim=embedding_size, num_layers=1, num_attention_heads=2, use_positional_encoding=False) if self.teacher_mode == 'initialization': self.option_embedder.weight = self.teacher.option_embedder.weight self.option_embedder.weight.requires_grad = True self.scorer.weight = self.teacher.scorer.weight self.scorer.weight.requires_grad = True denoise_mode = denoise_mode.lower() assert denoise_mode in ('soft', 'hard', 'both', 'lambda'), ( f'denoise_mode ({denoise_mode}) ' 'not in ("soft", "hard", "both", "lambda").') self.denoise_mode = denoise_mode self.denoise_lambda = denoise_lambda self.loss = nn.CrossEntropyLoss() self.acc = CategoricalAccuracy()
def __init__(self, idiom_vector_path: str, dropout: float, vocab: Vocabulary, content_embedder: TextFieldEmbedder, use_pretrained: bool = False, use_reasoner: bool = False, idiom_vector_size: int = None, reasoner_mode: str = None) -> None: super().__init__(vocab) self.content_embedder = content_embedder if idiom_vector_size is not None and use_pretrained: raise ValueError( "When `use_pretrained` is True, `idiom_vector_size` must be None." ) idiom_list, idiom_vectors = [], [] with open(idiom_vector_path) as fh: for line in fh: idiom_list.append(line.strip().split()[0]) idiom_vectors.append(list(map(float, line.strip().split()[1:]))) self.use_pretrained = use_pretrained if self.use_pretrained: self.option_embedder = modules.Embedding( num_embeddings=len(idiom_list), embedding_dim=len(idiom_vectors[0]), projection_dim=self.content_embedder.get_output_dim(), # 使用 预训练的成语向量 weight=torch.FloatTensor(idiom_vectors)) else: embedding_dim = idiom_vector_size or len(idiom_vectors[0]) self.option_embedder = modules.Embedding( num_embeddings=len(idiom_list), embedding_dim=embedding_dim, projection_dim=self.content_embedder.get_output_dim(), # 使用 预训练的成语向量 # weight=torch.FloatTensor(idiom_vectors) ) self.dropout = nn.Dropout(dropout) self.scorer = nn.Linear(self.content_embedder.get_output_dim(), 1) self.use_reasoner = use_reasoner if use_reasoner: embedding_size = self.content_embedder.get_output_dim() if reasoner_mode is None: reasoner_mode = 'self_attention' else: reasoner_mode = reasoner_mode.lower() assert reasoner_mode in ('self_attention', 'gated_self_attention') self.reasoner_mode = reasoner_mode if reasoner_mode == 'self_attention': self.option_reasoner = StackedSelfAttentionEncoder( input_dim=embedding_size, hidden_dim=embedding_size, projection_dim=embedding_size, feedforward_hidden_dim=embedding_size, num_layers=1, num_attention_heads=2, use_positional_encoding=False) elif reasoner_mode == "gated_self_attention": self.option_reasoner = GatedSelfAttention( input_dim=embedding_size, hidden_dim=embedding_size, projection_dim=embedding_size, feedforward_hidden_dim=embedding_size, num_layers=1, num_attention_heads=2) self.loss = nn.CrossEntropyLoss() self.acc = CategoricalAccuracy()
def __init__(self, idiom_vector_path: str, idiom_graph_path: str, dropout: float, vocab: Vocabulary, content_embedder: TextFieldEmbedder, use_pretrained: bool = False, idiom_vector_size: int = None, neighbor_num: int = 7, num_neighbour_attention_heads: int = 2) -> None: super().__init__(vocab) self.content_embedder = content_embedder if idiom_vector_size is not None and use_pretrained: raise ValueError( "When `use_pretrained` is True, `idiom_vector_size` must be None." ) idiom_list, idiom_vectors = [], [] with open(idiom_vector_path) as fh: for line in fh: idiom_list.append(line.strip().split()[0]) idiom_vectors.append(list(map(float, line.strip().split()[1:]))) self.graph_embedder = GraphEmbedder(idiom_graph_path, neighbor_num=neighbor_num, drop_neighbor=False) self.use_pretrained = use_pretrained if self.use_pretrained: self.option_embedder = modules.Embedding( num_embeddings=len(idiom_list), embedding_dim=len(idiom_vectors[0]), projection_dim=self.content_embedder.get_output_dim(), # 使用 预训练的成语向量 weight=torch.FloatTensor(idiom_vectors)) else: embedding_dim = idiom_vector_size or len(idiom_vectors[0]) self.option_embedder = modules.Embedding( num_embeddings=len(idiom_list), embedding_dim=embedding_dim, projection_dim=self.content_embedder.get_output_dim(), # 使用 预训练的成语向量 # weight=torch.FloatTensor(idiom_vectors) ) self.dropout = nn.Dropout(dropout) self.scorer = nn.Linear(self.content_embedder.get_output_dim(), 1) embedding_size = self.content_embedder.get_output_dim() self.neighbour_reasoner = StackedSelfAttentionEncoder( input_dim=embedding_size, hidden_dim=embedding_size, projection_dim=embedding_size, feedforward_hidden_dim=embedding_size, num_layers=1, num_attention_heads=num_neighbour_attention_heads, use_positional_encoding=False) self.option_encoder = FirstVecEncoder(embedding_dim=embedding_size) self.option_reasoner = StackedSelfAttentionEncoder( input_dim=embedding_size, hidden_dim=embedding_size, projection_dim=embedding_size, feedforward_hidden_dim=embedding_size, num_layers=1, num_attention_heads=2, use_positional_encoding=False) self.data_merger = FeedForward( input_dim=embedding_size + embedding_size + embedding_size, num_layers=1, hidden_dims=embedding_size, activations=Activation.by_name('linear')(), dropout=0.1) self.loss = nn.CrossEntropyLoss() self.acc = CategoricalAccuracy()