def __init__(self, exp_global=Ref(Path("exp_global")), layers=1, input_dim=None, hidden_dim=None, dropout=None, weightnoise_std=None, param_init=None, bias_init=None): register_handler(self) # self.num_layers = layers input_dim = input_dim or exp_global.default_layer_dim hidden_dim = hidden_dim or exp_global.default_layer_dim self.hidden_dim = hidden_dim # self.dropout_rate = dropout or exp_global.dropout # self.weightnoise_std = weightnoise_std or exp_global.weight_noise assert hidden_dim % 2 == 0 param_init = param_init or exp_global.param_init bias_init = bias_init or exp_global.bias_init model = exp_global.dynet_param_collection.param_col self.p_Wl = model.add_parameters(dim=(hidden_dim * 5, hidden_dim), init=param_init.initializer( (hidden_dim * 5, hidden_dim))) self.p_Wr = model.add_parameters(dim=(hidden_dim * 5, hidden_dim), init=param_init.initializer( (hidden_dim * 5, hidden_dim))) self.p_b = model.add_parameters(dim=(hidden_dim * 5, ), init=bias_init.initializer( (hidden_dim * 5, )))
def __init__(self, src_reader, trg_reader, src_embedder=bare(SimpleWordEmbedder), encoder=bare(BiLSTMSeqTransducer), attender=bare(MlpAttender), trg_embedder=bare(SimpleWordEmbedder), decoder=bare(MlpSoftmaxDecoder), inference=bare(SimpleInference), calc_global_fertility=False, calc_attention_entropy=False): '''Constructor. :param src_reader: A reader for the source side. :param src_embedder: A word embedder for the input language :param encoder: An encoder to generate encoded inputs :param attender: An attention module :param trg_reader: A reader for the target side. :param trg_embedder: A word embedder for the output language :param decoder: A decoder :param inference: The default inference strategy used for this model ''' register_handler(self) self.src_reader = src_reader self.trg_reader = trg_reader self.src_embedder = src_embedder self.encoder = encoder self.attender = attender self.trg_embedder = trg_embedder self.decoder = decoder self.calc_global_fertility = calc_global_fertility self.calc_attention_entropy = calc_attention_entropy self.inference = inference
def __init__(self, exp_global=Ref(Path("exp_global")), input_dim=None, hidden_dim=None, dropout=None, weightnoise_std=None, param_init=None, bias_init=None): register_handler(self) model = exp_global.dynet_param_collection.param_col input_dim = input_dim or exp_global.default_layer_dim hidden_dim = hidden_dim or exp_global.default_layer_dim self.hidden_dim = hidden_dim self.dropout_rate = dropout or exp_global.dropout self.weightnoise_std = weightnoise_std or exp_global.weight_noise self.input_dim = input_dim param_init = param_init or exp_global.param_init bias_init = bias_init or exp_global.bias_init # [i; f; o; g] self.p_Wx = model.add_parameters(dim=(hidden_dim * 4, input_dim), init=param_init.initializer( (hidden_dim * 4, input_dim), num_shared=4)) self.p_Wh = model.add_parameters(dim=(hidden_dim * 4, hidden_dim), init=param_init.initializer( (hidden_dim * 4, hidden_dim), num_shared=4)) self.p_b = model.add_parameters(dim=(hidden_dim * 4, ), init=bias_init.initializer( (hidden_dim * 4, ), num_shared=4)) self.dropout_mask_x = None self.dropout_mask_h = None
def __init__(self, yaml_context, layers=1, input_dim=None, hidden_dim=None, dropout=None, weightnoise_std=None): register_handler(self) self.num_layers = layers input_dim = input_dim or yaml_context.default_layer_dim hidden_dim = hidden_dim or yaml_context.default_layer_dim self.hidden_dim = hidden_dim self.dropout_rate = dropout or yaml_context.dropout self.weightnoise_std = weightnoise_std or yaml_context.weight_noise assert hidden_dim % 2 == 0 self.forward_layers = [ UniLSTMSeqTransducer(yaml_context, input_dim, hidden_dim / 2, dropout, weightnoise_std) ] self.backward_layers = [ UniLSTMSeqTransducer(yaml_context, input_dim, hidden_dim / 2, dropout, weightnoise_std) ] self.forward_layers += [ UniLSTMSeqTransducer(yaml_context, hidden_dim, hidden_dim / 2, dropout, weightnoise_std) for _ in range(layers - 1) ] self.backward_layers += [ UniLSTMSeqTransducer(yaml_context, hidden_dim, hidden_dim / 2, dropout, weightnoise_std) for _ in range(layers - 1) ]
def __init__(self, training_regimen, eval_every, name=None): register_handler(self) self.training_regimen = training_regimen self.eval_train_every = 1000 self.eval_dev_every = eval_every self.epoch_num = 0 self.epoch_loss = xnmt.loss.LossBuilder() self.epoch_words = 0 self.sent_num = 0 self.sent_num_not_report_train = 0 self.sent_num_not_report_dev = 0 self.fractional_epoch = 0 self.dev_score = None self.best_dev_score = None self.dev_words = 0 self.last_report_words = 0 self.start_time = time.time() self.last_report_train_time = self.start_time self.dev_start_time = self.start_time self.name = name
def __init__(self, exp_global=Ref(Path("exp_global")), emb_dim=None, weight_noise=None, word_dropout=0.0, fix_norm=None, init=None, vocab_size=None, vocab=None, yaml_path=None, src_reader=Ref(path=Path("model.src_reader"), required=False), trg_reader=Ref(path=Path("model.trg_reader"), required=False), param_init=None): register_handler(self) self.emb_dim = emb_dim or exp_global.default_layer_dim self.weight_noise = weight_noise or exp_global.weight_noise self.word_dropout = word_dropout self.fix_norm = fix_norm self.word_id_mask = None self.train = False self.dynet_param_collection = exp_global.dynet_param_collection self.vocab_size = self.choose_vocab_size(vocab_size, vocab, yaml_path, src_reader, trg_reader) param_init = param_init or exp_global.param_init self.embeddings = self.dynet_param_collection.param_col\ .add_lookup_parameters((self.vocab_size, self.emb_dim), init=param_init.initializer((self.vocab_size, self.emb_dim), is_lookup=True))
def __init__(self, yaml_context, input_dim=512, layers=1, hidden_dim=None, residual_to_output=False, dropout=None, bidirectional=True): register_handler(self) self._final_states = None hidden_dim = hidden_dim or yaml_context.default_layer_dim if bidirectional: self.builder = ResidualBiRNNBuilder(yaml_context, layers, input_dim, hidden_dim, residual_to_output, dropout=dropout) else: self.builder = ResidualRNNBuilder(yaml_context, layers, input_dim, hidden_dim, residual_to_output, dropout=dropout)
def __init__(self, yaml_context, layers=1, input_dim=None, hidden_dim=None, downsampling_method="concat", reduce_factor=2, dropout=None): """ :param layers: depth of the PyramidalRNN :param input_dim: size of the inputs :param hidden_dim: size of the outputs (and intermediate layer representations) :param model :param rnn_builder_factory: RNNBuilder subclass, e.g. VanillaLSTMBuilder :param downsampling_method: how to perform downsampling (concat|skip) :param reduce_factor: integer, or list of ints (different skip for each layer) """ register_handler(self) hidden_dim = hidden_dim or yaml_context.default_layer_dim input_dim = input_dim or yaml_context.default_layer_dim self.dropout = dropout or yaml_context.dropout assert layers > 0 assert hidden_dim % 2 == 0 assert type(reduce_factor)==int or (type(reduce_factor)==list and len(reduce_factor)==layers-1) assert downsampling_method in ["concat", "skip"] self.builder_layers = [] self.downsampling_method = downsampling_method self.reduce_factor = reduce_factor self.input_dim = input_dim f = UniLSTMSeqTransducer(yaml_context, input_dim, hidden_dim / 2, dropout=dropout) b = UniLSTMSeqTransducer(yaml_context, input_dim, hidden_dim / 2, dropout=dropout) self.builder_layers.append((f, b)) for _ in range(layers - 1): layer_input_dim = hidden_dim if downsampling_method=="skip" else hidden_dim*reduce_factor f = UniLSTMSeqTransducer(yaml_context, layer_input_dim, hidden_dim / 2, dropout=dropout) b = UniLSTMSeqTransducer(yaml_context, layer_input_dim, hidden_dim / 2, dropout=dropout) self.builder_layers.append((f, b))
def __init__(self, exp_global=Ref(Path("exp_global")), input_dim=512, layers=1, hidden_dim=None, residual_to_output=False, dropout=None, bidirectional=True): register_handler(self) self._final_states = None hidden_dim = hidden_dim or exp_global.default_layer_dim if bidirectional: self.builder = ResidualBiRNNBuilder(num_layers=layers, input_dim=input_dim, hidden_dim=hidden_dim, add_to_output=residual_to_output, exp_global=exp_global, dropout=dropout) else: self.builder = ResidualRNNBuilder(exp_global=exp_global, num_layers=layers, input_dim=input_dim, hidden_dim=hidden_dim, add_to_output=residual_to_output, dropout=dropout)
def __init__(self, exp_global=Ref(Path("exp_global")), layers=1, input_dim=None, lstm_dim=None, mlp_hidden_dim=None, trg_embed_dim=None, dropout=None, rnn_spec="lstm", residual_to_output=False, input_feeding=True, bridge=bare(CopyBridge), label_smoothing=0.0, vocab_projector=None, vocab_size=None, vocab=None, trg_reader=Ref(path=Path("model.trg_reader"), required=False)): register_handler(self) self.param_col = exp_global.dynet_param_collection.param_col # Define dim lstm_dim = lstm_dim or exp_global.default_layer_dim self.mlp_hidden_dim = mlp_hidden_dim = mlp_hidden_dim or exp_global.default_layer_dim trg_embed_dim = trg_embed_dim or exp_global.default_layer_dim input_dim = input_dim or exp_global.default_layer_dim self.input_dim = input_dim self.label_smoothing = label_smoothing # Input feeding self.input_feeding = input_feeding self.lstm_dim = lstm_dim lstm_input = trg_embed_dim if input_feeding: lstm_input += input_dim # Bridge self.lstm_layers = layers self.bridge = bridge # LSTM self.fwd_lstm = RnnDecoder.rnn_from_spec( spec=rnn_spec, num_layers=layers, input_dim=lstm_input, hidden_dim=lstm_dim, model=self.param_col, residual_to_output=residual_to_output) # MLP self.context_projector = xnmt.linear.Linear(input_dim=input_dim + lstm_dim, output_dim=mlp_hidden_dim, model=self.param_col) self.vocab_size = self.choose_vocab_size(vocab_size, vocab, trg_reader) self.vocab_projector = vocab_projector or xnmt.linear.Linear( input_dim=self.mlp_hidden_dim, output_dim=self.vocab_size, model=self.param_col) # Dropout self.dropout = dropout or exp_global.dropout
def __init__(self, sequence=None): assert sequence is not None assert type( sequence) == list, "DefinedSequence need to have a list type" assert len( sequence) > 0, "Please input non empty list for FixedSequence" register_handler(self) self.sequence = sequence self.epoch_num = 0
def __init__(self, exp_global=Ref(Path("exp_global")), layers=1, input_dim=512, h=1, dropout=0.0, attn_dropout=False, layer_norm=False, **kwargs): register_handler(self) dy_model = exp_global.dynet_param_collection.param_col input_dim = input_dim or exp_global.default_layer_dim self.layer_names = [] for i in range(1, layers + 1): name = 'l{}'.format(i) layer = EncoderLayer(dy_model, input_dim, h, attn_dropout, layer_norm) self.layer_names.append((name, layer)) self.dropout_val = dropout or exp_global.dropout
def __init__(self, yaml_context, input_dim=None, category_dim=None, embed_dim=None): register_handler(self) model = yaml_context.dynet_param_collection.param_col self.category_output = xnmt.linear.Linear(input_dim, category_dim, model) self.category_embedder = xnmt.embedder.SimpleWordEmbedder( category_dim, embed_dim) self.train = True
def __init__(self, initial=0.1, warmup=0, grow=1, min_value=0.0, max_value=1.0): register_handler(self) self.value = initial self.warmup = warmup self.grow = grow self.min_value = min_value self.max_value = max_value
def __init__(self, exp_global=Ref(Path("exp_global")), emb_dim=None, weight_noise=None, word_dropout=0.0, fix_norm=None, vocab_size=None, vocab=None, yaml_path=None, src_reader=Ref(path=Path("model.src_reader"), required=False), trg_reader=Ref(path=Path("model.trg_reader"), required=False)): register_handler(self) self.fix_norm = fix_norm self.weight_noise = weight_noise or exp_global.weight_noise self.word_dropout = word_dropout self.emb_dim = emb_dim or exp_global.default_layer_dim self.dynet_param_collection = exp_global.dynet_param_collection self.vocab_size = self.choose_vocab_size(vocab_size, vocab, yaml_path, src_reader, trg_reader) self.embeddings = self.dynet_param_collection.param_col.add_parameters((self.vocab_size, self.emb_dim)) self.bias = self.dynet_param_collection.param_col.add_parameters((self.vocab_size))
def __init__(self, exp_global=Ref(Path("exp_global")), layers=1, input_dim=None, hidden_dim=None, dropout=None, weightnoise_std=None): register_handler(self) self.num_layers = layers input_dim = input_dim or exp_global.default_layer_dim hidden_dim = hidden_dim or exp_global.default_layer_dim self.hidden_dim = hidden_dim self.dropout_rate = dropout or exp_global.dropout self.weightnoise_std = weightnoise_std or exp_global.weight_noise assert hidden_dim % 2 == 0 self.forward_layers = [UniLSTMSeqTransducer(exp_global=exp_global, input_dim=input_dim, hidden_dim=hidden_dim/2, dropout=dropout, weightnoise_std=weightnoise_std)] self.backward_layers = [UniLSTMSeqTransducer(exp_global=exp_global, input_dim=input_dim, hidden_dim=hidden_dim/2, dropout=dropout, weightnoise_std=weightnoise_std)] self.forward_layers += [UniLSTMSeqTransducer(exp_global=exp_global, input_dim=hidden_dim, hidden_dim=hidden_dim/2, dropout=dropout, weightnoise_std=weightnoise_std) for _ in range(layers-1)] self.backward_layers += [UniLSTMSeqTransducer(exp_global=exp_global, input_dim=hidden_dim, hidden_dim=hidden_dim/2, dropout=dropout, weightnoise_std=weightnoise_std) for _ in range(layers-1)]
def __init__(self, initial=0.1, warmup=0, ratio=1, min_value=0.0, max_value=1.0): register_handler(self) self.__value = initial self.warmup = warmup self.ratio = ratio self.min_value = min_value self.max_value = max_value self.epoch_num = 0
def __init__(self, yaml_context, vocab_size, emb_dim = None, weight_noise = None, word_dropout = 0.0, fix_norm = None): """ :param yaml_context: :param vocab_size: :param emb_dim: """ register_handler(self) self.vocab_size = vocab_size self.fix_norm = fix_norm self.weight_noise = weight_noise or yaml_context.weight_noise self.word_dropout = word_dropout self.emb_dim = emb_dim or yaml_context.default_layer_dim self.embeddings = yaml_context.dynet_param_collection.param_col.add_parameters((self.vocab_size, self.emb_dim)) self.bias = yaml_context.dynet_param_collection.param_col.add_parameters((self.vocab_size))
def __init__(self, src_embedder, encoder, attender, trg_embedder, decoder): '''Constructor. :param src_embedder: A word embedder for the input language :param encoder: An encoder to generate encoded inputs :param attender: An attention module :param trg_embedder: A word embedder for the output language :param decoder: A decoder ''' register_handler(self) self.src_embedder = src_embedder self.encoder = encoder self.attender = attender self.trg_embedder = trg_embedder self.decoder = decoder
def __init__(self, exp_global=Ref(Path("exp_global")), layers=1, input_dim=512, h=1, dropout=0.0, attn_dropout=False, layer_norm=False, vocab_size = None, vocab = None, trg_reader = Ref(path=Path("model.trg_reader"))): register_handler(self) dy_model = exp_global.dynet_param_collection.param_col input_dim = input_dim or exp_global.default_layer_dim self.layer_names = [] for i in range(1, layers + 1): name = 'l{}'.format(i) layer = DecoderLayer(dy_model, input_dim, h, attn_dropout, layer_norm) self.layer_names.append((name, layer)) self.vocab_size = self.choose_vocab_size(vocab_size, vocab, trg_reader) self.output_affine = LinearSent(dy_model, input_dim, self.vocab_size) self.dropout_val = dropout or exp_global.dropout
def __init__(self, yaml_context, vocab_size, emb_dim = None, weight_noise = None, word_dropout = 0.0, fix_norm = None): """ :param vocab_size: :param emb_dim: :param weight_noise: apply Gaussian noise with given standard deviation to embeddings :param word_dropout: drop out word types with a certain probability, sampling word types on a per-sentence level, see https://arxiv.org/abs/1512.05287 :param fix_norm: fix the norm of word vectors to be radius r, see https://arxiv.org/abs/1710.01329 """ register_handler(self) self.vocab_size = vocab_size self.emb_dim = emb_dim or yaml_context.default_layer_dim self.weight_noise = weight_noise or yaml_context.weight_noise self.word_dropout = word_dropout self.fix_norm = fix_norm self.embeddings = yaml_context.dynet_param_collection.param_col.add_lookup_parameters((self.vocab_size, self.emb_dim)) self.word_id_mask = None self.train = False
def __init__(self, src_reader, src_embedder, encoder, trg_reader, trg_embedder, decoder, inference=None, input_dim=512): '''Constructor. :param src_embedder: A word embedder for the input language :param encoder: An encoder to generate encoded inputs :param attender: An attention module :param trg_embedder: A word embedder for the output language :param decoder: A decoder ''' register_handler(self) self.src_reader = src_reader self.src_embedder = src_embedder self.encoder = encoder self.trg_reader = trg_reader self.trg_embedder = trg_embedder self.decoder = decoder self.input_dim = input_dim self.inference = inference self.scale_emb = self.input_dim ** 0.5 self.max_input_len = 500 self.initialize_position_encoding(self.max_input_len, input_dim) # TODO: parametrize this
def __init__(self, exp_global=Ref(Path("exp_global")), layers=1, input_dim=None, hidden_dim=None, downsampling_method="concat", reduce_factor=2, dropout=None): register_handler(self) hidden_dim = hidden_dim or exp_global.default_layer_dim input_dim = input_dim or exp_global.default_layer_dim self.dropout = dropout or exp_global.dropout assert layers > 0 assert hidden_dim % 2 == 0 assert type(reduce_factor) == int or (type(reduce_factor) == list and len(reduce_factor) == layers - 1) assert downsampling_method in ["concat", "skip"] self.builder_layers = [] self.downsampling_method = downsampling_method self.reduce_factor = reduce_factor self.input_dim = input_dim f = UniLSTMSeqTransducer(exp_global=exp_global, input_dim=input_dim, hidden_dim=hidden_dim / 2, dropout=dropout) b = UniLSTMSeqTransducer(exp_global=exp_global, input_dim=input_dim, hidden_dim=hidden_dim / 2, dropout=dropout) self.builder_layers.append((f, b)) for _ in range(layers - 1): layer_input_dim = hidden_dim if downsampling_method == "skip" else hidden_dim * reduce_factor f = UniLSTMSeqTransducer(exp_global=exp_global, input_dim=layer_input_dim, hidden_dim=hidden_dim / 2, dropout=dropout) b = UniLSTMSeqTransducer(exp_global=exp_global, input_dim=layer_input_dim, hidden_dim=hidden_dim / 2, dropout=dropout) self.builder_layers.append((f, b))
def __init__(self, yaml_context, embed_encoder=None, segment_transducer=None, learn_segmentation=True, reinforcement_param=None, length_prior=3.5, learn_delete=False, length_prior_alpha=1.0, use_baseline=True, segmentation_warmup_counter=None): register_handler(self) model = yaml_context.dynet_param_collection.param_col # The Embed Encoder transduces the embedding vectors to a sequence of vector self.embed_encoder = embed_encoder # The Segment transducer predict a category based on the collected vector self.segment_transducer = segment_transducer # The Segment Encoder decides whether to segment or not self.segment_transform = linear.Linear( input_dim=embed_encoder.hidden_dim, output_dim=3 if learn_delete else 2, model=model) # The baseline linear regression model self.baseline = linear.Linear(input_dim=embed_encoder.hidden_dim, output_dim=1, model=model) self.use_baseline = use_baseline # Whether to learn segmentation or not self.learn_segmentation = learn_segmentation # Whether to learn deletion or not self.learn_delete = learn_delete # Other Parameters self.length_prior = length_prior self.length_prior_alpha = length_prior_alpha self.lmbd = reinforcement_param # States of the object self.train = True self.warmup_counter = 0 self.segmentation_warmup_counter = segmentation_warmup_counter
def __init__(self, src_reader, src_embedder, encoder, trg_reader, trg_embedder, decoder, inference=None, input_dim=512): register_handler(self) self.src_reader = src_reader self.src_embedder = src_embedder self.encoder = encoder self.trg_reader = trg_reader self.trg_embedder = trg_embedder self.decoder = decoder self.input_dim = input_dim self.inference = inference self.scale_emb = self.input_dim**0.5 self.max_input_len = 500 self.initialize_position_encoding(self.max_input_len, input_dim) # TODO: parametrize this
def __init__(self, src_reader, trg_reader, src_embedder=bare(SimpleWordEmbedder), encoder=bare(BiLSTMSeqTransducer), attender=bare(MlpAttender), trg_embedder=bare(SimpleWordEmbedder), decoder=bare(MlpSoftmaxDecoder), inference=bare(SimpleInference), calc_global_fertility=False, calc_attention_entropy=False): register_handler(self) self.src_reader = src_reader self.trg_reader = trg_reader self.src_embedder = src_embedder self.encoder = encoder self.attender = attender self.trg_embedder = trg_embedder self.decoder = decoder self.calc_global_fertility = calc_global_fertility self.calc_attention_entropy = calc_attention_entropy self.inference = inference
def __init__(self, exp_global=Ref(Path("exp_global")), emb_dim=None, weight_noise=None, word_dropout=0.0, fix_norm=None, init=None, vocab_size = None, vocab = None, yaml_path = None, src_reader = Ref(path=Path("model.src_reader"), required=False), trg_reader = Ref(path=Path("model.trg_reader"), required=False)): """ :param emb_dim: :param weight_noise: apply Gaussian noise with given standard deviation to embeddings :param word_dropout: drop out word types with a certain probability, sampling word types on a per-sentence level, see https://arxiv.org/abs/1512.05287 :param fix_norm: fix the norm of word vectors to be radius r, see https://arxiv.org/abs/1710.01329 """ register_handler(self) self.emb_dim = emb_dim or exp_global.default_layer_dim self.weight_noise = weight_noise or exp_global.weight_noise self.word_dropout = word_dropout self.fix_norm = fix_norm self.word_id_mask = None self.train = False self.dynet_param_collection = exp_global.dynet_param_collection self.vocab_size = self.choose_vocab_size(vocab_size, vocab, yaml_path, src_reader, trg_reader) if init == 'LeCunUniform': init = linear_init(self.vocab_size) self.embeddings = self.dynet_param_collection.param_col.add_lookup_parameters((self.vocab_size, self.emb_dim), init=init)
def __init__(self, yaml_context, vocab_size, layers=1, input_dim=None, lstm_dim=None, mlp_hidden_dim=None, trg_embed_dim=None, dropout=None, rnn_spec="lstm", residual_to_output=False, input_feeding=True, bridge=None, label_smoothing=0.0, vocab_projector=None): register_handler(self) param_col = yaml_context.dynet_param_collection.param_col # Define dim lstm_dim = lstm_dim or yaml_context.default_layer_dim mlp_hidden_dim = mlp_hidden_dim or yaml_context.default_layer_dim trg_embed_dim = trg_embed_dim or yaml_context.default_layer_dim input_dim = input_dim or yaml_context.default_layer_dim self.input_dim = input_dim self.label_smoothing = label_smoothing # Input feeding self.input_feeding = input_feeding self.lstm_dim = lstm_dim lstm_input = trg_embed_dim if input_feeding: lstm_input += input_dim # Bridge self.lstm_layers = layers self.bridge = bridge or NoBridge(yaml_context, self.lstm_layers, self.lstm_dim) # LSTM self.fwd_lstm = RnnDecoder.rnn_from_spec(spec = rnn_spec, num_layers = layers, input_dim = lstm_input, hidden_dim = lstm_dim, model = param_col, residual_to_output = residual_to_output) # MLP self.context_projector = xnmt.linear.Linear(input_dim = input_dim + lstm_dim, output_dim = mlp_hidden_dim, model = param_col) self.vocab_projector = vocab_projector or xnmt.linear.Linear(input_dim = mlp_hidden_dim, output_dim = vocab_size, model = param_col) # Dropout self.dropout = dropout or yaml_context.dropout
def __init__(self, yaml_context, input_dim=None, hidden_dim=None, dropout=None, weightnoise_std=None): register_handler(self) model = yaml_context.dynet_param_collection.param_col input_dim = input_dim or yaml_context.default_layer_dim hidden_dim = hidden_dim or yaml_context.default_layer_dim self.hidden_dim = hidden_dim self.dropout_rate = dropout or yaml_context.dropout self.weightnoise_std = weightnoise_std or yaml_context.weight_noise self.input_dim = input_dim # [i; f; o; g] self.p_Wx = model.add_parameters(dim=(hidden_dim * 4, input_dim)) self.p_Wh = model.add_parameters(dim=(hidden_dim * 4, hidden_dim)) self.p_b = model.add_parameters(dim=(hidden_dim * 4, ), init=dy.ConstInitializer(0.0)) self.dropout_mask_x = None self.dropout_mask_h = None
def __init__( self, exp_global=Ref(Path("exp_global")), ## COMPONENTS embed_encoder=None, segment_composer=None, final_transducer=None, ## OPTIONS length_prior=3.3, length_prior_alpha=None, # GeometricSequence epsilon_greedy=None, # GeometricSequence reinforce_scale=None, # GeometricSequence confidence_penalty=None, # SegmentationConfidencePenalty # For segmentation warmup (Always use the poisson prior) segmentation_warmup=0, ## FLAGS learn_delete=False, use_baseline=True, z_normalization=True, learn_segmentation=True, compose_char=False, log_reward=True, debug=False, print_sample=False): register_handler(self) model = exp_global.dynet_param_collection.param_col # Sanity check assert embed_encoder is not None assert segment_composer is not None assert final_transducer is not None # The Embed Encoder transduces the embedding vectors to a sequence of vector self.embed_encoder = embed_encoder if not hasattr(embed_encoder, "hidden_dim"): embed_encoder_dim = yaml_context.default_layer_dim else: embed_encoder_dim = embed_encoder.hidden_dim # The Segment transducer produced word embeddings based on sequence of character embeddings self.segment_composer = segment_composer # The final transducer self.final_transducer = final_transducer # Decision layer of segmentation self.segment_transform = linear.Linear( input_dim=embed_encoder_dim, output_dim=3 if learn_delete else 2, model=model) # The baseline linear regression model self.baseline = linear.Linear(input_dim=embed_encoder_dim, output_dim=1, model=model) # Flags self.use_baseline = use_baseline self.learn_segmentation = learn_segmentation self.learn_delete = learn_delete self.z_normalization = z_normalization self.debug = debug self.compose_char = compose_char self.print_sample = print_sample self.log_reward = log_reward # Fixed Parameters self.length_prior = length_prior self.segmentation_warmup = segmentation_warmup # Variable Parameters self.length_prior_alpha = length_prior_alpha self.lmbd = reinforce_scale self.eps = epsilon_greedy self.confidence_penalty = confidence_penalty # States of the object self.train = False