示例#1
0
def generate_lstm(net, step, lstm_params, lstm_out, dropout_ratio):
    """Takes the parameters to create the lstm, concatenates the lstm input
    with the previous hidden state, runs the lstm for the current timestep
    and then applies dropout to the output hidden state."""

    hidden_bottom = lstm_out[0]
    mem_bottom = lstm_out[1]
    num_cells = lstm_params[0]
    filler = lstm_params[1]
    net.f(Concat("concat%d" % step, bottoms=["lstm_input", hidden_bottom]))
    try:
        lstm_unit = LstmUnit("lstm%d" % step, num_cells,
                       weight_filler=filler, tie_output_forget=True,
                       param_names=["input_value", "input_gate",
                                    "forget_gate", "output_gate"],
                       bottoms=["concat%d" % step, mem_bottom],
                       tops=["lstm_hidden%d" % step, "lstm_mem%d" % step])
    except:
        # Old version of Apollocaffe sets tie_output_forget=True by default
        lstm_unit = LstmUnit("lstm%d" % step, num_cells,
                       weight_filler=filler,
                       param_names=["input_value", "input_gate",
                                    "forget_gate", "output_gate"],
                       bottoms=["concat%d" % step, mem_bottom],
                       tops=["lstm_hidden%d" % step, "lstm_mem%d" % step])
    net.f(lstm_unit)
    net.f(Dropout("dropout%d" % step, dropout_ratio,
                  bottoms=["lstm_hidden%d" % step]))
示例#2
0
def generate_number_layers(net, step, filler, max_len):
    """Inner products are fully connected layers. They generate
    the final regressions for the number ip_number_II"""
    concat_bottoms = {"number": []}
    for step in range(max_len):
        net.f(InnerProduct("ip_number_I%d" % step, 4,
               bottoms=["dropout%d" % step], output_4d=True,
               weight_filler=filler, 
               param_lr_mults=[10,20], param_decay_mults=[10,20]))
        net.f(ReLU("relu_number_I%d" % step, bottoms=["ip_number_I%d" % step], tops=["ip_number_I%d" % step]))
        concat_bottoms["number"].append("ip_number_I%d" % step)

    net.f(Concat("number_concat", bottoms = concat_bottoms["number"], concat_dim=1))
    net.f("""
        name: "number_rtrans"
        type: "ReverseTranspose"
        bottom: "number_concat"
        top: "number_rtrans"
          """)
    net.f(Dropout("number_dropout", 0.5,
                  bottoms=["number_rtrans"]))
    net.f(InnerProduct("ip_number", 40,
           bottoms=["number_dropout"], output_4d=False,
           param_lr_mults=[10,20], param_decay_mults=[10,20],
           weight_filler=filler))
示例#3
0
def forward(net, sentence_batches):
    net.clear_forward()
    batch = next(sentence_batches)
    sentence_batch = pad_batch(batch)
    length = min(sentence_batch.shape[1], 100)
    assert length > 0

    net.f(NumpyData('lstm_seed', np.zeros((batch_size, dimension))))
    for step in range(length):
        if step == 0:
            prev_hidden = 'lstm_seed'
            prev_mem = 'lstm_seed'
            word = np.zeros(sentence_batch[:, 0].shape)
        else:
            prev_hidden = 'lstm%d_hidden' % (step - 1)
            prev_mem = 'lstm%d_mem' % (step - 1)
            word = sentence_batch[:, step - 1]
        net.f(NumpyData('word%d' % step, word))
        net.f(
            Wordvec('wordvec%d' % step,
                    dimension,
                    vocab_size,
                    bottoms=['word%d' % step],
                    param_names=['wordvec_param']))
        net.f(
            Concat('lstm_concat%d' % step,
                   bottoms=[prev_hidden, 'wordvec%d' % step]))
        net.f(
            LstmUnit('lstm%d' % step,
                     bottoms=['lstm_concat%d' % step, prev_mem],
                     param_names=[
                         'lstm_input_value', 'lstm_input_gate',
                         'lstm_forget_gate', 'lstm_output_gate'
                     ],
                     tops=['lstm%d_hidden' % step,
                           'lstm%d_mem' % step],
                     num_cells=dimension))
        net.f(
            Dropout('dropout%d' % step, 0.16,
                    bottoms=['lstm%d_hidden' % step]))

        net.f(NumpyData('label%d' % step, sentence_batch[:, step]))
        net.f(
            InnerProduct('ip%d' % step,
                         vocab_size,
                         bottoms=['dropout%d' % step],
                         param_names=['softmax_ip_weights',
                                      'softmax_ip_bias']))
        net.f(
            SoftmaxWithLoss('softmax_loss%d' % step,
                            ignore_label=zero_symbol,
                            bottoms=['ip%d' % step,
                                     'label%d' % step]))
示例#4
0
def eval_forward(net):
    net.clear_forward()
    output_words = []
    net.f(NumpyData('lstm_hidden_prev', np.zeros((1, dimension))))
    net.f(NumpyData('lstm_mem_prev', np.zeros((1, dimension))))
    length = 150
    for step in range(length):
        net.clear_forward()
        net.f(NumpyData('word', [0]))
        prev_hidden = 'lstm_hidden_prev'
        prev_mem = 'lstm_mem_prev'
        if step == 0:
            output = ord('.')
        else:
            output = softmax_choice(net.blobs['softmax'].data)
        output_words.append(output)
        net.blobs['word'].data[0] = output
        net.f(
            Wordvec('wordvec',
                    dimension,
                    vocab_size,
                    bottoms=['word'],
                    param_names=['wordvec_param']))
        net.f(Concat('lstm_concat', bottoms=[prev_hidden, 'wordvec']))
        net.f(
            LstmUnit('lstm',
                     dimension,
                     bottoms=['lstm_concat', prev_mem],
                     param_names=[
                         'lstm_input_value', 'lstm_input_gate',
                         'lstm_forget_gate', 'lstm_output_gate'
                     ],
                     tops=['lstm_hidden_next', 'lstm_mem_next']))
        net.f(Dropout('dropout', 0.16, bottoms=['lstm_hidden_next']))

        net.f(
            InnerProduct('ip',
                         vocab_size,
                         bottoms=['dropout'],
                         param_names=['softmax_ip_weights',
                                      'softmax_ip_bias']))
        net.blobs['ip'].data[:] *= i_temperature
        net.f(Softmax('softmax', bottoms=['ip']))
        net.blobs['lstm_hidden_prev'].data_tensor.copy_from(
            net.blobs['lstm_hidden_next'].data_tensor)
        net.blobs['lstm_mem_prev'].data_tensor.copy_from(
            net.blobs['lstm_mem_next'].data_tensor)
    print ''.join([chr(x) for x in output_words])
示例#5
0
def generate_losses(net, filler, net_config):
    """Generates the two losses used for ReInspect. The hungarian loss and
    the final box_loss, that represents the final softmax confidence loss"""
    
    net.f(Dropout("numbers_dropout", 0.5,
                  bottoms=["bbox_concat"]))
    net.f(InnerProduct("ip_numbers", 40,
           bottoms=["numbers_dropout"], output_4d=False,
           param_lr_mults=[10,20], param_decay_mults=[10,20],
           weight_filler=filler))
           
    net.f("""
          name: "nunbersloss"
          type: "SoftmaxWithLoss"
          bottom: "ip_numbers"
          bottom: "numbers"
          top: "nunbersloss"
          loss_weight: %s
              """ % net_config["hungarian_loss_weight"])
示例#6
0
def generate_lstm(net, step, num_cells, hidden_bottom, mem_bottom, filler,
                  dropout_ratio):
    """Takes the parameters to create the lstm, concatenates the lstm input
    with the previous hidden state, runs the lstm for the current timestep and then 
    applies dropout to the output hidden state."""

    net.f(Concat("concat%d" % step, bottoms=["lstm_input", hidden_bottom]))
    net.f(
        LstmUnit("lstm%d" % step,
                 num_cells,
                 weight_filler=filler,
                 param_names=[
                     "input_value", "input_gate", "forget_gate", "output_gate"
                 ],
                 bottoms=["concat%d" % step, mem_bottom],
                 tops=["lstm_hidden%d" % step,
                       "lstm_mem%d" % step]))
    net.f(
        Dropout("dropout%d" % step,
                dropout_ratio,
                bottoms=["lstm_hidden%d" % step]))
示例#7
0
def forward(net, input_data, net_config, deploy=False):
    """Defines and creates the ReInspect network given the net, input data
    and configurations."""

    net.clear_forward()

    net.f(
        NumpyData("wordvec_layer",
                  data=np.array(input_data["wordvec_layer"])))  # 128*38*100*1
    net.f(NumpyData("target_words",
                    data=np.array(input_data["target_words"])))  # 128*100*1*1

    tops = []
    slice_point = []
    for i in range(net_config['max_len']):
        tops.append('label%d' % i)
        if i != 0:
            slice_point.append(i)
    net.f(
        Slice("label_slice_layer",
              slice_dim=1,
              bottoms=["target_words"],
              tops=tops,
              slice_point=slice_point))

    tops = []
    slice_point = []
    for i in range(net_config['max_len']):
        tops.append('target_wordvec%d_4d' % i)
        if i != 0:
            slice_point.append(i)
    net.f(
        Slice("wordvec_slice_layer",
              slice_dim=2,
              bottoms=['wordvec_layer'],
              tops=tops,
              slice_point=slice_point))

    for i in range(net_config["max_len"]):  # 128*38*1*1 -> 128*38
        net.f("""
            name: "target_wordvec%d"
            type: "Reshape"
            bottom: "target_wordvec%d_4d"
            top: "target_wordvec%d"
            reshape_param {
              shape {
                dim: 0  # copy the dimension from below
                dim: -1
              }
            }
            """ % (i, i, i))
        #net.f(Reshape('target_wordvec%d'%i, bottoms = ['target_wordvec%d_4d'%i], shape = [0,-1]))

    filler = Filler("uniform", net_config["init_range"])
    for i in range(net_config['max_len']):
        if i == 0:
            net.f(
                NumpyData(
                    "dummy_layer",
                    np.zeros((net_config["batch_size"],
                              net_config["lstm_num_cells"]))))
            net.f(
                NumpyData(
                    "dummy_mem_cell",
                    np.zeros((net_config["batch_size"],
                              net_config["lstm_num_cells"]))))

        for j in range(net_config['lstm_num_stacks']):
            bottoms = []
            if j == 0:
                bottoms.append('target_wordvec%d' % i)
            if j >= 1:
                bottoms.append('dropout%d_%d' % (j - 1, i))
            if i == 0:
                bottoms.append("dummy_layer")
            else:
                bottoms.append('lstm%d_hidden%d' % (j, i - 1))
            net.f(Concat('concat%d_layer%d' % (j, i), bottoms=bottoms))

            param_names = []
            for k in range(4):
                param_names.append('lstm%d_param_%d' % (j, k))
            bottoms = ['concat%d_layer%d' % (j, i)]
            if i == 0:
                bottoms.append('dummy_mem_cell')
            else:
                bottoms.append('lstm%d_mem_cell%d' % (j, i - 1))
            net.f(
                LstmUnit('lstm%d_layer%d' % (j, i),
                         net_config["lstm_num_cells"],
                         weight_filler=filler,
                         param_names=param_names,
                         bottoms=bottoms,
                         tops=[
                             'lstm%d_hidden%d' % (j, i),
                             'lstm%d_mem_cell%d' % (j, i)
                         ]))

            net.f(
                Dropout('dropout%d_%d' % (j, i),
                        net_config["dropout_ratio"],
                        bottoms=['lstm%d_hidden%d' % (j, i)]))

    bottoms = []
    for i in range(net_config['max_len']):
        bottoms.append('dropout%d_%d' % (net_config['lstm_num_stacks'] - 1, i))
    net.f(Concat('hidden_concat', bottoms=bottoms, concat_dim=0))

    net.f(
        InnerProduct("inner_product",
                     net_config['vocab_size'],
                     bottoms=["hidden_concat"],
                     weight_filler=filler))

    bottoms = []
    for i in range(net_config['max_len']):
        bottoms.append('label%d' % i)
    net.f(Concat('label_concat', bottoms=bottoms, concat_dim=0))

    if deploy:
        net.f(Softmax("word_probs", bottoms=["inner_product"]))
    else:
        net.f(
            SoftmaxWithLoss("word_loss",
                            bottoms=["inner_product", "label_concat"],
                            ignore_label=net_config['zero_symbol']))
示例#8
0
def forward(net, input_data, net_config, phase='train', deploy=False):
    """Defines and creates the ReInspect network given the net, input data
    and configurations."""

    net.clear_forward()

    batch_ws_i = input_data["ws_i"]
    batch_stop_i = [net_config['max_len']] * net_config['batch_size']
    wordvec_layer = input_data["wordvec_layer"]  # 128*38*100*1
    net.f(NumpyData("target_words",
                    data=np.array(input_data["target_words"])))  # 128*100*1*1

    tops = []
    slice_point = []
    for i in range(net_config['max_len']):
        tops.append('label%d' % i)
        if i != 0:
            slice_point.append(i)
    net.f(
        Slice("label_slice_layer",
              slice_dim=1,
              bottoms=["target_words"],
              tops=tops,
              slice_point=slice_point))

    net.f(NumpyData("target_wordvec%d" % 0,
                    data=wordvec_layer[:, :, 0, 0]))  # start symbol, 128*38

    filler = Filler("uniform", net_config["init_range"])
    for i in range(net_config['max_len']):
        if i == 0:
            net.f(
                NumpyData(
                    "dummy_layer",
                    np.zeros((net_config["batch_size"],
                              net_config["lstm_num_cells"]))))
            net.f(
                NumpyData(
                    "dummy_mem_cell",
                    np.zeros((net_config["batch_size"],
                              net_config["lstm_num_cells"]))))

        for j in range(net_config['lstm_num_stacks']):
            bottoms = []
            if j == 0:
                bottoms.append('target_wordvec%d' % i)
            if j >= 1:
                bottoms.append('dropout%d_%d' % (j - 1, i))
            if i == 0:
                bottoms.append("dummy_layer")
            else:
                bottoms.append('lstm%d_hidden%d' % (j, i - 1))
            net.f(Concat('concat%d_layer%d' % (j, i), bottoms=bottoms))

            param_names = []
            for k in range(4):
                param_names.append('lstm%d_param_%d' % (j, k))
            bottoms = ['concat%d_layer%d' % (j, i)]
            if i == 0:
                bottoms.append('dummy_mem_cell')
            else:
                bottoms.append('lstm%d_mem_cell%d' % (j, i - 1))
            net.f(
                LstmUnit('lstm%d_layer%d' % (j, i),
                         net_config["lstm_num_cells"],
                         weight_filler=filler,
                         param_names=param_names,
                         bottoms=bottoms,
                         tops=[
                             'lstm%d_hidden%d' % (j, i),
                             'lstm%d_mem_cell%d' % (j, i)
                         ]))

            net.f(
                Dropout('dropout%d_%d' % (j, i),
                        net_config["dropout_ratio"],
                        bottoms=['lstm%d_hidden%d' % (j, i)]))

        net.f(
            InnerProduct("ip%d" % i,
                         net_config['vocab_size'],
                         bottoms=[
                             'dropout%d_%d' %
                             (net_config['lstm_num_stacks'] - 1, i)
                         ],
                         weight_filler=filler))

        if i < net_config['max_len'] - 1:
            tar_wordvec = np.array(wordvec_layer[:, :, i + 1, 0])  # 128*38
            if phase == 'test':
                net.f(Softmax("word_probs%d" % i, bottoms=["ip%d" % i]))
                probs = net.blobs["word_probs%d" % i].data
                for bi in range(net_config['batch_size']):
                    if i >= batch_ws_i[bi] and i < batch_stop_i[bi]:
                        vec = [0] * net_config["vocab_size"]
                        peakIndex = np.argmax(probs[bi, :])
                        if peakIndex == net_config['whitespace_symbol']:
                            batch_stop_i[bi] = i + 1
                        vec[peakIndex] = 1
                        tar_wordvec[bi, :] = vec
            net.f(NumpyData("target_wordvec%d" % (i + 1), data=tar_wordvec))

    bottoms = []
    for i in range(net_config['max_len']):
        bottoms.append("ip%d" % i)
    net.f(Concat('ip_concat', bottoms=bottoms, concat_dim=0))

    bottoms = []
    for i in range(net_config['max_len']):
        bottoms.append('label%d' % i)
    net.f(Concat('label_concat', bottoms=bottoms, concat_dim=0))

    if deploy:
        net.f(Softmax("word_probs", bottoms=["ip_concat"]))

    net.f(
        SoftmaxWithLoss("word_loss",
                        bottoms=["ip_concat", "label_concat"],
                        ignore_label=net_config['zero_symbol']))