def __init__(self, model_dim=None, word_embedding_dim=None, vocab_size=None, use_product_feature=None, use_difference_feature=None, initial_embeddings=None, fine_tune_loaded_embeddings=None, num_classes=None, embedding_keep_rate=None, use_sentence_pair=False, classifier_keep_rate=None, mlp_dim=None, num_mlp_layers=None, mlp_ln=None, composition_ln=None, context_args=None, trainable_temperature=None, **kwargs): super(EESC, self).__init__() self.use_sentence_pair = use_sentence_pair self.use_difference_feature = use_difference_feature self.use_product_feature = use_product_feature self.model_dim = model_dim self.trainable_temperature = trainable_temperature self.classifier_dropout_rate = 1. - classifier_keep_rate self.embedding_dropout_rate = 1. - embedding_keep_rate vocab = Vocab() vocab.size = initial_embeddings.shape[ 0] if initial_embeddings is not None else vocab_size vocab.vectors = initial_embeddings self.embed = Embed(word_embedding_dim, vocab.size, vectors=vocab.vectors, fine_tune=fine_tune_loaded_embeddings) self.binary_tree_lstm = BinaryTreeLSTM( word_embedding_dim, model_dim // 2, False, composition_ln=composition_ln, trainable_temperature=trainable_temperature) mlp_input_dim = self.get_features_dim() self.mlp = MLP(mlp_input_dim, mlp_dim, num_classes, num_mlp_layers, mlp_ln, self.classifier_dropout_rate) self.encode = context_args.encoder self.reshape_input = context_args.reshape_input self.reshape_context = context_args.reshape_context # For sample printing and logging self.mask_memory = None self.inverted_vocabulary = None self.temperature_to_display = 0.0
def __init__(self, model_dim=None, word_embedding_dim=None, vocab_size=None, use_product_feature=None, use_difference_feature=None, initial_embeddings=None, num_classes=None, embedding_keep_rate=None, use_sentence_pair=False, classifier_keep_rate=None, mlp_dim=None, num_mlp_layers=None, mlp_ln=None, context_args=None, bidirectional=None, **kwargs): super(RNNModel, self).__init__() self.use_sentence_pair = use_sentence_pair self.use_difference_feature = use_difference_feature self.use_product_feature = use_product_feature self.bidirectional = bidirectional self.input_dim = context_args.input_dim self.model_dim = model_dim classifier_dropout_rate = 1. - classifier_keep_rate self.embedding_dropout_rate = 1. - embedding_keep_rate args = Args() args.size = model_dim vocab = Vocab() vocab.size = initial_embeddings.shape[ 0] if initial_embeddings is not None else vocab_size vocab.vectors = initial_embeddings self.embed = Embed(word_embedding_dim, vocab.size, vectors=vocab.vectors) self.rnn = nn.LSTM(self.input_dim, self.model_dim / 2 if self.bidirectional else self.model_dim, num_layers=1, bidirectional=self.bidirectional, batch_first=True) mlp_input_dim = self.get_features_dim() self.mlp = MLP(mlp_input_dim, mlp_dim, num_classes, num_mlp_layers, mlp_ln, classifier_dropout_rate) self.encode = context_args.encoder self.reshape_input = context_args.reshape_input self.reshape_context = context_args.reshape_context
def __init__(self, mlp_input_dim, mlp_dim, num_classes, num_mlp_layers, mlp_ln, classifier_keep_rate=0.0): super(Discriminator, self).__init__() self.classifier_dropout_rate = 1. - classifier_keep_rate self.discriminator = MLP(mlp_input_dim, mlp_dim, num_classes, num_mlp_layers, mlp_ln, self.classifier_dropout_rate)
def __init__(self, model_dim=None, word_embedding_dim=None, vocab_size=None, initial_embeddings=None, num_classes=None, embedding_keep_rate=None, use_sentence_pair=False, classifier_keep_rate=None, mlp_dim=None, num_mlp_layers=None, mlp_ln=None, context_args=None, gated=None, selection_keep_rate=None, pyramid_selection_keep_rate=None, **kwargs): super(Pyramid, self).__init__() self.use_sentence_pair = use_sentence_pair self.model_dim = model_dim self.gated = gated self.selection_keep_rate = selection_keep_rate classifier_dropout_rate = 1. - classifier_keep_rate args = Args() args.size = model_dim args.input_dropout_rate = 1. - embedding_keep_rate vocab = Vocab() vocab.size = initial_embeddings.shape[ 0] if initial_embeddings is not None else vocab_size vocab.vectors = initial_embeddings self.embed = Embed(word_embedding_dim, vocab.size, vectors=vocab.vectors) self.composition_fn = SimpleTreeLSTM(model_dim / 2, composition_ln=False) self.selection_fn = Linear(initializer=HeKaimingInitializer)(model_dim, 1) # TODO: Set up layer norm. mlp_input_dim = model_dim * 2 if use_sentence_pair else model_dim self.mlp = MLP(mlp_input_dim, mlp_dim, num_classes, num_mlp_layers, mlp_ln, classifier_dropout_rate) self.encode = context_args.encoder self.reshape_input = context_args.reshape_input self.reshape_context = context_args.reshape_context
def __init__(self, model_dim=None, word_embedding_dim=None, vocab_size=None, initial_embeddings=None, fine_tune_loaded_embeddings=None, use_difference_feature=None, use_product_feature=None, num_classes=None, embedding_keep_rate=None, classifier_keep_rate=None, mlp_dim=None, num_mlp_layers=None, mlp_ln=None, use_sentence_pair=False, context_args=None, **kwargs ): super(BaseModel, self).__init__() self.use_sentence_pair = use_sentence_pair self.use_difference_feature = use_difference_feature self.use_product_feature = use_product_feature self.model_dim = model_dim classifier_dropout_rate = 1. - classifier_keep_rate args = Args() args.size = model_dim vocab = Vocab() vocab.size = initial_embeddings.shape[0] if initial_embeddings is not None else vocab_size vocab.vectors = initial_embeddings self.embed = Embed( word_embedding_dim, vocab.size, vectors=vocab.vectors, fine_tune=fine_tune_loaded_embeddings) mlp_input_dim = self.get_features_dim() self.mlp = MLP(mlp_input_dim, mlp_dim, num_classes, num_mlp_layers, mlp_ln, classifier_dropout_rate) self.encode = context_args.encoder self.reshape_input = context_args.reshape_input self.reshape_context = context_args.reshape_context
def __init__(self, model_dim=None, word_embedding_dim=None, vocab_size=None, initial_embeddings=None, num_classes=None, embedding_keep_rate=None, use_sentence_pair=False, classifier_keep_rate=None, mlp_dim=None, num_mlp_layers=None, mlp_bn=None, context_args=None, **kwargs): super(BaseModel, self).__init__() self.use_sentence_pair = use_sentence_pair self.model_dim = model_dim classifier_dropout_rate = 1. - classifier_keep_rate args = Args() args.size = model_dim args.input_dropout_rate = 1. - embedding_keep_rate vocab = Vocab() vocab.size = initial_embeddings.shape[ 0] if initial_embeddings is not None else vocab_size vocab.vectors = initial_embeddings self.embed = Embed(word_embedding_dim, vocab.size, vectors=vocab.vectors) self.rnn = nn.LSTM(args.size, model_dim, num_layers=1, batch_first=True) mlp_input_dim = model_dim * 2 if use_sentence_pair else model_dim self.mlp = MLP(mlp_input_dim, mlp_dim, num_classes, num_mlp_layers, mlp_bn, classifier_dropout_rate) self.encode = context_args.encoder self.reshape_input = context_args.reshape_input self.reshape_context = context_args.reshape_context
def __init__(self, rl_mu=None, rl_baseline=None, rl_reward=None, rl_weight=None, rl_whiten=None, rl_valid=None, rl_epsilon=None, rl_catalan=None, rl_catalan_backprop=None, rl_transition_acc_as_reward=None, rl_value_size=None, rl_value_lstm=None, **kwargs): super(BaseModel, self).__init__(**kwargs) self.kwargs = kwargs self.rl_mu = rl_mu self.rl_baseline = rl_baseline self.rl_reward = rl_reward self.rl_weight = rl_weight self.rl_whiten = rl_whiten self.rl_valid = rl_valid self.rl_value_size = rl_value_size self.rl_value_lstm = rl_value_lstm self.spinn.catalan = rl_catalan self.spinn.catalan_backprop = rl_catalan_backprop self.rl_transition_acc_as_reward = rl_transition_acc_as_reward if self.rl_baseline == "value": num_outputs = 2 if self.use_sentence_pair else 1 self.v_dim = self.rl_value_lstm self.v_rnn_dim = self.v_dim self.v_mlp_dim = self.v_dim * num_outputs self.v_rnn = nn.LSTM(self.input_dim, self.v_rnn_dim, num_layers=1, batch_first=True) self.v_mlp = MLP(self.v_mlp_dim, mlp_dim=self.rl_value_size, num_classes=1, num_mlp_layers=2, mlp_ln=True, classifier_dropout_rate=0.1) self.register_buffer('baseline', torch.FloatTensor([0.0]))
def __init__(self, rl_mu=None, rl_baseline=None, rl_reward=None, rl_weight=None, rl_whiten=None, rl_valid=None, rl_epsilon=None, rl_entropy=None, rl_entropy_beta=None, rl_catalan=None, rl_transition_acc_as_reward=None, **kwargs): super(BaseModel, self).__init__(**kwargs) self.kwargs = kwargs self.rl_mu = rl_mu self.rl_baseline = rl_baseline self.rl_reward = rl_reward self.rl_weight = rl_weight self.rl_whiten = rl_whiten self.rl_valid = rl_valid self.rl_entropy = rl_entropy self.rl_entropy_beta = rl_entropy_beta self.spinn.epsilon = rl_epsilon self.spinn.catalan = rl_catalan self.rl_transition_acc_as_reward = rl_transition_acc_as_reward if self.rl_baseline == "value": self.v_dim = 100 self.v_rnn = nn.LSTM(self.input_dim, self.v_dim, num_layers=1, batch_first=True) self.v_mlp = MLP(self.v_dim, mlp_dim=1024, num_classes=1, num_mlp_layers=2, mlp_ln=True, classifier_dropout_rate=0.1) self.register_buffer('baseline', torch.FloatTensor([0.0]))
def __init__(self, rl_mu=None, rl_baseline=None, rl_reward=None, rl_value_reward=None, rl_weight=None, rl_whiten=None, rl_valid=None, rl_epsilon=None, rl_catalan=None, rl_catalan_backprop=None, rl_transition_acc_as_reward=None, rl_value_size=None, rl_value_lstm=None, rl_detach=None, data_type=None, **kwargs): super(BaseModel, self).__init__(data_type=data_type, **kwargs) # ^ To-do: The data_type addiiton doesn't seem kosher, make change. --Nikita self.kwargs = kwargs self.rl_mu = rl_mu self.rl_baseline = rl_baseline self.rl_reward = rl_reward self.rl_value_reward = rl_value_reward self.rl_weight = rl_weight self.rl_whiten = rl_whiten self.rl_valid = rl_valid self.rl_value_size = rl_value_size self.rl_value_lstm = rl_value_lstm self.spinn.catalan = rl_catalan self.spinn.catalan_backprop = rl_catalan_backprop self.rl_transition_acc_as_reward = rl_transition_acc_as_reward self.rl_detach = rl_detach self.data_type = data_type if self.rl_baseline == "value": num_outputs = 2 if self.use_sentence_pair else 1 self.v_dim = self.rl_value_lstm self.v_rnn_dim = self.v_dim self.v_mlp_dim = self.v_dim * num_outputs if data_type == "mt": self.v_rnn = nn.LSTM(self.input_dim // 2, self.v_rnn_dim, num_layers=1, batch_first=True) else: self.v_rnn = nn.LSTM(self.input_dim, self.v_rnn_dim, num_layers=1, batch_first=True) self.v_mlp = MLP(self.v_mlp_dim, mlp_dim=self.rl_value_size, num_classes=1, num_mlp_layers=2, mlp_ln=True, classifier_dropout_rate=0.1) elif self.rl_baseline == "shared": self.v_mlp = MLP(self.input_dim // 2, mlp_dim=self.rl_value_size, num_classes=1, num_mlp_layers=2, mlp_ln=True, classifier_dropout_rate=0.1) elif self.rl_baseline == "lbtree": num_outputs = 2 if self.use_sentence_pair else 1 # To-do: make new flag to replace rl_value_size self.lb_mlp = MLP(self.model_dim // 2, mlp_dim=self.rl_value_size, num_classes=1, num_mlp_layers=2, mlp_ln=True, classifier_dropout_rate=0.1) self.register_buffer('baseline', torch.FloatTensor([0.0]))
def __init__(self, model_dim=None, word_embedding_dim=None, vocab_size=None, initial_embeddings=None, fine_tune_loaded_embeddings=None, num_classes=None, embedding_keep_rate=None, encode_reverse=None, encode_bidirectional=None, encode_num_layers=None, use_sentence_pair=False, use_difference_feature=False, use_product_feature=False, mlp_dim=None, num_mlp_layers=None, mlp_ln=None, classifier_keep_rate=None, context_args=None, composition_args=None, data_type=None, **kwargs): super(BaseModel, self).__init__() self.use_sentence_pair = use_sentence_pair self.use_difference_feature = use_difference_feature self.use_product_feature = use_product_feature self.hidden_dim = hidden_dim = model_dim self.wrap_items = composition_args.wrap_items self.extract_h = composition_args.extract_h self.initial_embeddings = initial_embeddings self.word_embedding_dim = word_embedding_dim self.model_dim = model_dim self.data_type = data_type classifier_dropout_rate = 1. - classifier_keep_rate vocab = Vocab() vocab.size = initial_embeddings.shape[ 0] if initial_embeddings is not None else vocab_size vocab.vectors = initial_embeddings # Build parsing component. self.lms = self.build_lms(composition_args, vocab) # Build classiifer. if self.data_type != "mt": features_dim = self.get_features_dim() self.mlp = MLP(features_dim, mlp_dim, num_classes, num_mlp_layers, mlp_ln, classifier_dropout_rate) self.embedding_dropout_rate = 1. - embedding_keep_rate # Create dynamic embedding layer. self.embed = Embed(word_embedding_dim, vocab.size, vectors=vocab.vectors, fine_tune=fine_tune_loaded_embeddings) self.input_dim = context_args.input_dim self.encode = context_args.encoder self.reshape_input = context_args.reshape_input self.reshape_context = context_args.reshape_context self.inverted_vocabulary = None # Create Lift layer self.lift = Lift(context_args.input_dim, model_dim * model_dim)
def __init__(self, model_dim=None, word_embedding_dim=None, vocab_size=None, initial_embeddings=None, num_classes=None, embedding_keep_rate=None, tracking_lstm_hidden_dim=4, transition_weight=None, encode_reverse=None, encode_bidirectional=None, encode_num_layers=None, lateral_tracking=None, tracking_ln=None, use_tracking_in_composition=None, predict_use_cell=None, use_sentence_pair=False, use_difference_feature=False, use_product_feature=False, mlp_dim=None, num_mlp_layers=None, mlp_ln=None, classifier_keep_rate=None, context_args=None, composition_args=None, detach=None, evolution=None, **kwargs): super(SpinnBaseModel, self).__init__() assert not ( use_tracking_in_composition and not lateral_tracking ), "Lateral tracking must be on to use tracking in composition." self.use_sentence_pair = use_sentence_pair self.use_difference_feature = use_difference_feature self.use_product_feature = use_product_feature self.hidden_dim = composition_args.size self.wrap_items = composition_args.wrap_items self.extract_h = composition_args.extract_h self.initial_embeddings = initial_embeddings self.word_embedding_dim = word_embedding_dim self.model_dim = model_dim classifier_dropout_rate = 1. - classifier_keep_rate vocab = Vocab() vocab.size = initial_embeddings.shape[ 0] if initial_embeddings is not None else vocab_size vocab.vectors = initial_embeddings # Build parsing component. self.spinn = self.build_rspinn(composition_args, vocab, predict_use_cell) # Build classiifer. features_dim = self.get_features_dim() #same as spinn self.mlp = MLP(features_dim, mlp_dim, num_classes, num_mlp_layers, mlp_ln, classifier_dropout_rate) self.embedding_dropout_rate = 1. - embedding_keep_rate # Create dynamic embedding layer. self.embed = Embed(word_embedding_dim, vocab.size, vectors=vocab.vectors) self.input_dim = context_args.input_dim self.encode = context_args.encoder self.reshape_input = context_args.reshape_input self.reshape_context = context_args.reshape_context self.inverted_vocabulary = None
def __init__(self, model_dim=None, word_embedding_dim=None, vocab_size=None, use_product_feature=None, use_difference_feature=None, initial_embeddings=None, num_classes=None, embedding_keep_rate=None, use_sentence_pair=False, classifier_keep_rate=None, mlp_dim=None, num_mlp_layers=None, mlp_ln=None, composition_ln=None, context_args=None, trainable_temperature=None, enforce_right=None, parent_selection=None, composition_args=None, predict_use_cell=None, low_dim=None, topk=None, cp_num=None, multiproc=None, **kwargs): super(CatalanPyramid, self).__init__() self.use_sentence_pair = use_sentence_pair self.use_difference_feature = use_difference_feature self.use_product_feature = use_product_feature self.model_dim = model_dim self.low_dim = low_dim self.topk = topk self.cp_num = cp_num self.multiproc = multiproc self.trainable_temperature = trainable_temperature self.parent_selection = parent_selection self.enforce_right = enforce_right self.classifier_dropout_rate = 1. - classifier_keep_rate self.embedding_dropout_rate = 1. - embedding_keep_rate vocab = Vocab() vocab.size = initial_embeddings.shape[ 0] if initial_embeddings is not None else vocab_size vocab.vectors = initial_embeddings self.embed = Embed(word_embedding_dim, vocab.size, vectors=vocab.vectors) self.chart_parser = ChartParser( word_embedding_dim, model_dim // 2, low_dim, multiproc, composition_ln=composition_ln, trainable_temperature=trainable_temperature, parent_selection=parent_selection, use_sentence_pair=use_sentence_pair) # assert FLAGS.lateral_tracking == False # TODO: move assertion flag to base. self.spinn = self.build_spinn(composition_args, vocab, predict_use_cell) mlp_input_dim = self.get_features_dim() self.mlp = MLP(mlp_input_dim, mlp_dim, num_classes, num_mlp_layers, mlp_ln, self.classifier_dropout_rate) # SPINN vars self.encode = context_args.encoder #self.encode = Linear()(word_embedding_dim, model_dim) self.reshape_input = context_args.reshape_input self.reshape_context = context_args.reshape_context self.input_dim = context_args.input_dim self.wrap_items = composition_args.wrap_items self.extract_h = composition_args.extract_h # For sample printing and logging self.parse_memory = None self.inverted_vocabulary = None self.temperature_to_display = 0.0
def __init__(self, model_dim=None, word_embedding_dim=None, vocab_size=None, initial_embeddings=None, num_classes=None, mlp_dim=None, embedding_keep_rate=None, classifier_keep_rate=None, tracking_lstm_hidden_dim=4, transition_weight=None, encode_style=None, encode_reverse=None, encode_bidirectional=None, encode_num_layers=None, use_skips=False, lateral_tracking=None, use_tracking_in_composition=None, use_sentence_pair=False, use_difference_feature=False, use_product_feature=False, num_mlp_layers=None, mlp_bn=None, use_projection=None, **kwargs ): super(BaseModel, self).__init__() self.use_sentence_pair = use_sentence_pair self.use_difference_feature = use_difference_feature self.use_product_feature = use_product_feature self.hidden_dim = hidden_dim = model_dim / 2 args = Args() args.lateral_tracking = lateral_tracking args.use_tracking_in_composition = use_tracking_in_composition args.size = model_dim/2 args.tracker_size = tracking_lstm_hidden_dim args.transition_weight = transition_weight self.initial_embeddings = initial_embeddings self.word_embedding_dim = word_embedding_dim self.model_dim = model_dim classifier_dropout_rate = 1. - classifier_keep_rate vocab = Vocab() vocab.size = initial_embeddings.shape[0] if initial_embeddings is not None else vocab_size vocab.vectors = initial_embeddings # Build parsing component. self.spinn = self.build_spinn(args, vocab, use_skips) # Build classiifer. features_dim = self.get_features_dim() self.mlp = MLP(features_dim, mlp_dim, num_classes, num_mlp_layers, mlp_bn, classifier_dropout_rate) # The input embeddings represent the hidden and cell state, so multiply by 2. self.embedding_dropout_rate = 1. - embedding_keep_rate input_embedding_dim = args.size * 2 # Projection will effectively be done by the encoding network. use_projection = True if encode_style is None else False # Create dynamic embedding layer. self.embed = Embed(input_embedding_dim, vocab.size, vectors=vocab.vectors, use_projection=use_projection) # Optionally build input encoder. if encode_style is not None: self.encode = self.build_input_encoder(encode_style=encode_style, word_embedding_dim=word_embedding_dim, model_dim=model_dim, num_layers=encode_num_layers, bidirectional=encode_bidirectional, reverse=encode_reverse, dropout=self.embedding_dropout_rate)
def __init__(self, model_dim=None, word_embedding_dim=None, vocab_size=None, initial_embeddings=None, fine_tune_loaded_embeddings=None, num_classes=None, embedding_keep_rate=None, tracking_lstm_hidden_dim=4, transition_weight=None, encode_reverse=None, encode_bidirectional=None, encode_num_layers=None, lateral_tracking=None, tracking_ln=None, use_tracking_in_composition=None, predict_use_cell=None, use_sentence_pair=False, use_difference_feature=False, use_product_feature=False, mlp_dim=None, num_mlp_layers=None, mlp_ln=None, classifier_keep_rate=None, context_args=None, composition_args=None, with_attention=False, data_type=None, target_vocabulary=None, onmt_module=None, **kwargs ): super(BaseModel, self).__init__() assert not ( use_tracking_in_composition and not lateral_tracking), "Lateral tracking must be on to use tracking in composition." self.use_sentence_pair = use_sentence_pair self.use_difference_feature = use_difference_feature self.use_product_feature = use_product_feature self.hidden_dim = composition_args.size self.wrap_items = composition_args.wrap_items self.extract_h = composition_args.extract_h if data_type == "mt": self.post_projection= Linear()(context_args.input_dim, int(context_args.input_dim/2), bias=True) self.initial_embeddings = initial_embeddings self.word_embedding_dim = word_embedding_dim self.model_dim = model_dim self.data_type = data_type classifier_dropout_rate = 1. - classifier_keep_rate vocab = Vocab() vocab.size = initial_embeddings.shape[0] if initial_embeddings is not None else vocab_size vocab.vectors = initial_embeddings # Build parsing component. self.spinn = self.build_spinn( composition_args, vocab, predict_use_cell) # Build classiifer. features_dim = self.get_features_dim() if data_type != "mt": self.mlp = MLP(features_dim, mlp_dim, num_classes, num_mlp_layers, mlp_ln, classifier_dropout_rate) #self.generator = nn.Sequential(nn.Linear(self.model_dim, len(self.target_vocabulary), nn.LogSoftmax()) self.embedding_dropout_rate = 1. - embedding_keep_rate # Create dynamic embedding layer. self.embed = Embed( word_embedding_dim, vocab.size, vectors=vocab.vectors, fine_tune=fine_tune_loaded_embeddings) self.input_dim = context_args.input_dim self.encode = context_args.encoder self.reshape_input = context_args.reshape_input self.reshape_context = context_args.reshape_context self.inverted_vocabulary = None
def __init__(self, model_dim=None, word_embedding_dim=None, vocab_size=None, initial_embeddings=None, num_classes=None, mlp_dim=None, embedding_keep_rate=None, classifier_keep_rate=None, tracking_lstm_hidden_dim=4, transition_weight=None, use_encode=None, encode_reverse=None, encode_bidirectional=None, encode_num_layers=None, use_skips=False, lateral_tracking=None, use_tracking_in_composition=None, # use_sentence_pair=False, use_difference_feature=False, use_product_feature=False, num_mlp_layers=None, mlp_bn=None, model_specific_params={}, **kwargs ): super(SentencePairModel, self).__init__() logger.info('ATTSPINN SentencePairModel init...') # self.use_sentence_pair = use_sentence_pair self.use_difference_feature = use_difference_feature self.use_product_feature = use_product_feature self.hidden_dim = hidden_dim = model_dim / 2 # features_dim = hidden_dim * 2 if use_sentence_pair else hidden_dim features_dim = model_dim # [premise, hypothesis, diff, product] if self.use_difference_feature: features_dim += self.hidden_dim if self.use_product_feature: features_dim += self.hidden_dim mlp_input_dim = features_dim self.initial_embeddings = initial_embeddings self.word_embedding_dim = word_embedding_dim self.model_dim = model_dim classifier_dropout_rate = 1. - classifier_keep_rate args = Args() args.lateral_tracking = lateral_tracking args.use_tracking_in_composition = use_tracking_in_composition args.size = model_dim/2 args.tracker_size = tracking_lstm_hidden_dim args.transition_weight = transition_weight args.using_diff_in_mlstm = model_specific_params['using_diff_in_mlstm'] args.using_prod_in_mlstm = model_specific_params['using_prod_in_mlstm'] args.using_null_in_attention = model_specific_params['using_null_in_attention'] vocab = Vocab() vocab.size = initial_embeddings.shape[0] if initial_embeddings is not None else vocab_size vocab.vectors = initial_embeddings # The input embeddings represent the hidden and cell state, so multiply by 2. self.embedding_dropout_rate = 1. - embedding_keep_rate input_embedding_dim = args.size * 2 # Create dynamic embedding layer. self.embed = Embed(input_embedding_dim, vocab.size, vectors=vocab.vectors) self.use_encode = use_encode if use_encode: self.encode_reverse = encode_reverse self.encode_bidirectional = encode_bidirectional self.bi = 2 if self.encode_bidirectional else 1 self.encode_num_layers = encode_num_layers self.encode = nn.LSTM(model_dim, model_dim / self.bi, num_layers=encode_num_layers, batch_first=True, bidirectional=self.encode_bidirectional, dropout=self.embedding_dropout_rate) self.spinn = self.build_spinn(args, vocab, use_skips) self.attention = self.build_attention(args) self.mlp = MLP(mlp_input_dim, mlp_dim, num_classes, num_mlp_layers, mlp_bn, classifier_dropout_rate)
def __init__(self, model_dim=None, word_embedding_dim=None, vocab_size=None, use_product_feature=None, use_difference_feature=None, initial_embeddings=None, num_classes=None, embedding_keep_rate=None, use_sentence_pair=False, classifier_keep_rate=None, mlp_dim=None, num_mlp_layers=None, mlp_ln=None, composition_ln=None, context_args=None, trainable_temperature=None, test_temperature_multiplier=None, selection_dim=None, gumbel=None, **kwargs): super(Pyramid, self).__init__() self.use_sentence_pair = use_sentence_pair self.use_difference_feature = use_difference_feature self.use_product_feature = use_product_feature self.model_dim = model_dim self.test_temperature_multiplier = test_temperature_multiplier self.trainable_temperature = trainable_temperature self.gumbel = gumbel self.selection_dim = selection_dim self.classifier_dropout_rate = 1. - classifier_keep_rate self.embedding_dropout_rate = 1. - embedding_keep_rate vocab = Vocab() vocab.size = initial_embeddings.shape[ 0] if initial_embeddings is not None else vocab_size vocab.vectors = initial_embeddings self.embed = Embed(word_embedding_dim, vocab.size, vectors=vocab.vectors) self.composition_fn = SimpleTreeLSTM(model_dim / 2, composition_ln=composition_ln) self.selection_fn_1 = Linear(initializer=HeKaimingInitializer)( model_dim, selection_dim) self.selection_fn_2 = Linear(initializer=HeKaimingInitializer)( selection_dim, 1) def selection_fn(selection_input): selection_hidden = F.tanh(self.selection_fn_1(selection_input)) return self.selection_fn_2(selection_hidden) self.selection_fn = selection_fn mlp_input_dim = self.get_features_dim() self.mlp = MLP(mlp_input_dim, mlp_dim, num_classes, num_mlp_layers, mlp_ln, self.classifier_dropout_rate) if self.trainable_temperature: self.temperature = nn.Parameter(torch.ones(1, 1), requires_grad=True) self.encode = context_args.encoder self.reshape_input = context_args.reshape_input self.reshape_context = context_args.reshape_context # For sample printing and logging self.merge_sequence_memory = None self.inverted_vocabulary = None self.temperature_to_display = 0.0