def prep_student(dataset, verbose, alpha, temperature):
    # if student is not saved beforehand, train and save it
    model_path = '{}/models/student/'.format(dataset)
    if not isfile(model_path +
                  'model/model.h5') or not isfile(model_path +
                                                  'model/model.json'):
        print('Student model doesnt exist, training it...')
        # load dataset and logits
        _dataset = get_data(dataset)
        x_train, y_train, x_test, y_test = _dataset.get_data()
        train_features = np.load(
            '{}/models/teacher/npy/train_logits.npy'.format(dataset))
        test_features = np.load(
            '{}/models/teacher/npy/test_logits.npy'.format(dataset))
        # normalized output with temperature shape=(num_samples,num_classes)
        y_train_soft = softmax(train_features / temperature)
        y_test_soft = softmax(test_features / temperature)
        # concatenated output labels=(num_samples,2*num_classes)
        y_train_new = np.concatenate([y_train, y_train_soft], axis=1)
        y_test_new = np.concatenate([y_test, y_test_soft], axis=1)
        # build student model
        student = get_model(_dataset, 'distillation', is_dropout=True)
        # remove softmax
        student.layers.pop()
        # get features
        logits = student.layers[-1].output
        # normal softmax output
        probs = Activation('softmax')(logits)
        # softmax output with temperature
        logits_T = Lambda(lambda x: x / temperature)(logits)
        probs_T = Activation('softmax')(logits_T)
        # concatanete
        output = concatenate([probs, probs_T])
        # This is our new student model
        student = Model(student.input, output)
        compile_model(student,
                      loss=distillation_loss(_dataset.num_classes, alpha),
                      metrics=[acc_distillation(_dataset.num_classes)])
        # create a new dataset with generated data
        dataset_s = DatasetCls(x_train,
                               y_train_new,
                               x_test,
                               y_test_new,
                               dataset_name=dataset)
        # train student
        student = train(dataset_s,
                        student,
                        PARAMS[dataset]['epochs'] * 2,
                        PARAMS[dataset]['batch_size'],
                        log_dir=model_path,
                        callbacks=[
                            early_stop(patience=PARAMS[dataset]['patience'],
                                       monitor='val_loss',
                                       verbose=verbose)
                        ],
                        verbose=verbose)
        # save output files
        save_model_outputs(student, _dataset, model_path)

        K.clear_session()
示例#2
0
 def sample_s(self, ps):
     """
     ps = (updated) prior for s
     """
     intercept = self.clf_vera.intercept_
     w = self.clf_vera.coef_.T
     
     res = np.ones((self.n, 3))
     for claim, stance, source in self.d:
         self.f[claim, source] -= (self.s[stance] - 1)
         x = intercept + self.f[claim].dot(w) # if sparse matrix, need to take first row
         #x -= (self.s[stance] - 1) * w[source] #
         
         # res = prob of V given each value of S
         res[stance, 0] = util.softmax(x - w[source])[int(self.v[claim])]
         res[stance, 1] = util.softmax(x            )[int(self.v[claim])]
         res[stance, 2] = util.softmax(x + w[source])[int(self.v[claim])]
         
         prob = res[stance] * ps[stance]
         prob = prob / (np.sum(prob))
         
         # sample new S
         self.s[stance]= self.rs.choice(range(3), p = prob)
         
         # replace S
         self.f[claim, source] += (self.s[stance] - 1)
         
         
     return res
示例#3
0
def process(results, truth):
    K = len(results['struct'][0])

    probs1 = results['count'] / np.sum(results['count'])
    probs2 = util.softmax(results['llh'])
    probs3 = util.softmax(results['llh'] + np.log(results['count']))
    # Verify that the means by which we compute posterior probabilities in the
    # results files hasn't changed. (Even if it has, we don't use
    # `results['prob']` in this file, so it should be fine.)
    assert np.allclose(probs3, results['prob'])

    pard = compute_parent_dist(results['struct'], probs3)
    parentropy, _, _ = compute_parentropy(pard)

    truth_num = len(truth['struct'])
    truth_llh = np.zeros(truth_num)
    truth_probs = np.ones(truth_num) / truth_num
    truth_pard = compute_parent_dist(truth['struct'], truth_probs)
    truth_parentropy, _, _ = compute_parentropy(truth_pard)
    assert np.allclose(truth_probs, truth['prob'])

    top_probs = 10
    good_thresh = 1e-3
    jsd_parents = compute_parents_jsd(pard, truth_pard)
    jsd_parents_phi_mean = jsd_parents * np.mean(truth['phi'][0, 1:], axis=1)
    jsd_parents_phi_max = jsd_parents * np.max(truth['phi'][0, 1:], axis=1)

    stats = {}
    stats['true_trees'] = truth_num
    stats['sampled_unique_trees'] = len(probs3)
    stats['num_good'] = np.sum(probs3 >= good_thresh)
    stats['prop_good'] = '%.3f' % (np.sum(probs3 >= good_thresh) / len(probs3))
    stats['H_trees_truth'] = calc_entropy(truth_probs)
    stats['H_trees_pairtree_1'] = calc_entropy(probs1)
    stats['H_trees_pairtree_2'] = calc_entropy(probs2)
    stats['H_trees_pairtree_3'] = calc_entropy(probs3)
    stats['H_parents_truth'] = truth_parentropy
    stats['H_parents_pairtree'] = parentropy
    stats['prop_truth_recovered'], stats['jaccard'] = compute_indices(
        results['struct'], truth['struct'])
    stats['jsd_trees'] = compute_tree_jsd(results['struct'], probs3,
                                          truth['struct'], truth_probs)
    stats['jsd_parents_sum'] = np.sum(jsd_parents)
    stats['jsd_parents_mean'] = np.sum(jsd_parents) / K
    stats['jsd_parents_max'] = np.max(jsd_parents)
    stats['jsd_parents'] = jsd_parents
    stats['jsd_parents_phi_mean'] = np.max(jsd_parents_phi_mean)
    stats['jsd_parents_phi_max'] = np.max(jsd_parents_phi_max)
    stats['jsd_parents_phi_mean_top10'] = make_sorted(jsd_parents_phi_mean)
    stats['jsd_parents_phi_max_top10'] = make_sorted(jsd_parents_phi_max)
    stats['top_probs_1_top10'] = make_sorted(probs1)
    stats['top_probs_2_top10'] = make_sorted(probs2)
    stats['top_probs_3_top10'] = make_sorted(probs3)

    keys = list(stats.keys())
    vals = [stats[key] for key in keys]
    for A in (keys, vals):
        print(*A, sep=',')
示例#4
0
文件: model.py 项目: timguoqk/cloze
def inference(documents, doc_mask, query, query_mask):
    embedding = tf.get_variable('embedding',
                                [FLAGS.vocab_size, FLAGS.embedding_size],
                                initializer=tf.random_uniform_initializer(minval=-0.05, maxval=0.05))

    regularizer = tf.nn.l2_loss(embedding)

    doc_emb = tf.nn.dropout(tf.nn.embedding_lookup(
        embedding, documents), FLAGS.dropout_keep_prob)
    doc_emb.set_shape([None, None, FLAGS.embedding_size])

    query_emb = tf.nn.dropout(tf.nn.embedding_lookup(
        embedding, query), FLAGS.dropout_keep_prob)
    query_emb.set_shape([None, None, FLAGS.embedding_size])

    with tf.variable_scope('document', initializer=orthogonal_initializer()):
        fwd_cell = tf.nn.rnn_cell.GRUCell(FLAGS.hidden_size)
        back_cell = tf.nn.rnn_cell.GRUCell(FLAGS.hidden_size)

        doc_len = tf.reduce_sum(doc_mask, reduction_indices=1)
        h, _ = tf.nn.bidirectional_dynamic_rnn(
            fwd_cell, back_cell, doc_emb,
            sequence_length=tf.to_int64(doc_len),
            dtype=tf.float32)
        h_doc = tf.concat(2, h)

    with tf.variable_scope('query', initializer=orthogonal_initializer()):
        fwd_cell = tf.nn.rnn_cell.GRUCell(FLAGS.hidden_size)
        back_cell = tf.nn.rnn_cell.GRUCell(FLAGS.hidden_size)

        query_len = tf.reduce_sum(query_mask, reduction_indices=1)
        h, _ = tf.nn.bidirectional_dynamic_rnn(
            fwd_cell, back_cell, query_emb,
            sequence_length=tf.to_int64(query_len),
            dtype=tf.float32)
        h_query = tf.concat(2, h)

    M = tf.batch_matmul(h_doc, h_query, adj_y=True)
    M_mask = tf.to_float(tf.batch_matmul(tf.expand_dims(
        doc_mask, -1), tf.expand_dims(query_mask, 1)))

    alpha = softmax(M, 1, M_mask)
    beta = softmax(M, 2, M_mask)

    query_importance = tf.expand_dims(tf.reduce_sum(
        beta, 1) / tf.to_float(tf.expand_dims(doc_len, -1)), -1)

    s = tf.squeeze(tf.batch_matmul(alpha, query_importance), [2])

    unpacked_s = zip(tf.unpack(s, FLAGS.batch_size),
                     tf.unpack(documents, FLAGS.batch_size))
    y_hat = tf.pack([
        tf.unsorted_segment_sum(attentions, sentence_ids, FLAGS.vocab_size)
        for (attentions, sentence_ids) in unpacked_s
    ])

    return y_hat, regularizer
示例#5
0
def main():
    weight = np.random.rand(28 * 28, 10)
    #print weight.shape
    bias = np.random.rand(10)
    #print softmax(np.arange(6).reshape(2,3).T).T
    #print bias.shape
    #print np.zeros((2,3)).reshape((6,))
    #print np.arange(6).reshape((2,3))
    #print np.arange(6).shape
    pos_cnt = 0
    total_cnt = 0
    batch_size = 2
    for _ in range(10):
        for feature, label in load_train_data_batch(batch_size):
            feature = feature/256.0
            feature = feature.reshape((batch_size, 28*28))
            pred = np.matmul(feature, weight)
            #print pred
            #print pred.shape
            #print bias.shape
            pred = pred + bias
            #print pred, pred.shape
            pred = softmax(pred.T).T
            pred_y = np.argmax(pred, axis=1)
            #print pred_y
            #print pred_y
            #print label
            pos_cnt += np.sum(np.equal(pred_y, label))
            total_cnt += label.shape[0]
    print pos_cnt, total_cnt, 1.0*pos_cnt/total_cnt
示例#6
0
def count_clusters(results):
  tidxs = np.array(sorted(results.tree_summary.keys()))
  llhs = np.array([results.tree_summary[tidx]['llh'] for tidx in tidxs])
  probs = util.softmax(llhs)
  clusters = np.array([len(results.tree_summary[tidx]['populations']) for tidx in tidxs]) - 1
  expected_clusters = np.sum(probs * clusters)
  return expected_clusters
示例#7
0
    def fit(self, sess, train, dev_data_np, dev_seqlen, dev_labels):
        losses_epochs = []
        dev_performances_epochs = []
        dev_predictions_epochs = []
        dev_predicted_classes_epochs = []

        for epoch in range(self.config.n_epochs):
            print("Epoch %d out of %d", epoch + 1, self.config.n_epochs)
            loss = self.run_epoch(sess, train)

            # Computing predictions
            dev_predictions = self.predict_on_batch(sess, dev_data_np,
                                                    dev_seqlen)

            # Computing development performance
            dev_predictions = softmax(np.array(dev_predictions))
            dev_predicted_classes = np.argmax(dev_predictions, axis=1)
            dev_performance = get_performance(dev_predicted_classes,
                                              dev_labels,
                                              n_classes=4)

            # Adding to global outputs #MODIF
            dev_predictions_epochs.append(dev_predictions)
            dev_predicted_classes_epochs.append(dev_predicted_classes)
            dev_performances_epochs.append(dev_performance)
            losses_epochs.append(loss)

        return losses_epochs, dev_performances_epochs, dev_predicted_classes_epochs, dev_predictions_epochs
    def __forward__(self, x):

        """
        this method is an implementation of forward propagation with one sample at a time.
        
        Parameters: 
            x          :   numpy array  (contains one sample of features)
                    
        Returns:
            zs         :    list        (contains numpy arrays, each array coresponds to sum(xW+b) of respective layer)
            activations:    list        (contains numpy arrays, each array coresponds to output of respective layer)
        
        """
                                                                      # demo shapes
        l0 = x.T                                                      # [1, 784]
        z1 = np.dot(l0, self.weights['l1'].T) + self.biases['l1']     # [1, 300] = [1, 784] .* [784, 300] + [1, 300]
        l1 = util.relu(z1)                                            # [1, 300]
        
        z2 = np.dot(l1, self.weights['l2'].T) + self.biases['l2']     # [1, 90]  = [1, 300] .* [300, 90] + [1, 90] 
        l2 = util.relu(z2)                                            # [1, 90]
        
        z3 = np.dot(l2, self.weights['l3'].T) + self.biases['l3']     # [1, 10]  = [1, 90] .* [90, 10] + [1, 10]
        l3 = util.softmax(z3)                                         # [1, 10]

        zs = [z1, z2, z3]
        activations = [l0, l1, l2, l3]

        return zs, activations
示例#9
0
    def gradient(self, x, t):
        # 请从参数字典获取网络参数
        w1, b1 = self.params['W1'], self.params['b1']
        w2, b2 = self.params['W2'], self.params['b2']
        # 保存梯度结果
        grads = {}

        # forward
        a1 = np.dot(x, w1) + b1
        h1 = sigmoid(a1)
        a2 = np.dot(h1, w2) + b2
        output = softmax(a2)

        # backward
        dy = (output - t) / x.shape[0]
        grads['W2'] = np.dot(h1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)
        """
            grads['b2'] = np.sum(dy, axis=0),为什么求和?
                - 首先输出为多少维度,那么b就是多少维的向量,和样本数量无关
            因为正向传播过程中,偏置b向量会分别加到每一个样本数据上,因此只需把这些值加起来就好
            也就是说:第一个样本产生由于b产生误差 dy1
                    第二个样本产生由于b产生误差 dy2
                    ...
                    b产生的总误差为: dy1 + dy2 + ...      
        """
        da1 = np.dot(dy, w2.T)
        ha1 = sigmoid(a1)
        dz1 = (1.0 - ha1) * ha1 * da1
        grads['W1'] = np.dot(x.T, dz1)
        grads['b1'] = np.sum(dz1, axis=0)

        return grads
示例#10
0
def _scaled_softmax(A, R=100):
  # Ensures `max(softmax(A)) / min(softmax(A)) <= R`.
  #
  # Typically, I use this as a "softer softmax", ensuring that the largest
  # element in the softmax is at most 100x the magnitude of the smallest.
  # Otherwise, given large differences between the minimum and maximum values,
  # the softmax becomes even more sharply peaked, with one element absorbing
  # effectively all mass.
  noninf = np.logical_not(np.isinf(A))
  if np.sum(noninf) == 0:
    return util.softmax(A)
  delta = np.max(A[noninf]) - np.min(A[noninf])
  if np.isclose(0, delta):
    return util.softmax(A)
  B = min(1, np.log(R) / delta)
  return util.softmax(B*A)
示例#11
0
    def segment(self, pic, ohl, name):
        with tf.variable_scope('Segmentation'):
            out_seg = self.segmenter.net(pic)

        weight_non_nod = tf.constant([[0.05]])
        class_weighting = tf.concat(
            [weight_non_nod,
             tf.ones(shape=[self.channels - 1, 1])], axis=0)
        location_weight = tf.tensordot(ohl, class_weighting, axes=[[3], [0]])

        raw_ce = tf.nn.softmax_cross_entropy_with_logits(labels=ohl,
                                                         logits=out_seg)
        weighted_ce = tf.multiply(tf.expand_dims(raw_ce, axis=3),
                                  location_weight)
        ce = tf.reduce_mean(weighted_ce)

        # visualization of segmentation
        seg = self.vis_seg(ohl)
        seg_net = self.vis_seg(ut.softmax(out_seg))

        # the tensorboard logging
        with tf.name_scope(name):
            self.sum_seg.append(tf.summary.image('Image', pic, max_outputs=2))
            self.sum_seg.append(
                tf.summary.image('Annotation', seg, max_outputs=2))
            self.sum_seg.append(
                tf.summary.image('Segmentation', seg_net, max_outputs=2))
            self.sum_seg.append(
                tf.summary.image('Weight_map', location_weight, max_outputs=2))
        return ce
示例#12
0
    def call(self, inputs):
        """Following the routing algorithm from Hinton's paper,
        but replace b = b + <u,v> with b = <u,v>.
        This change can improve the feature representation of Capsule.
        However, you can replace
            b = K.batch_dot(outputs, hat_inputs, [2, 3])
        with
            b += K.batch_dot(outputs, hat_inputs, [2, 3])
        to realize a standard routing.
        """

        if self.share_weights:
            hat_inputs = K.conv1d(inputs, self.kernel)
        else:
            hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1])

        batch_size = K.shape(inputs)[0]
        input_num_capsule = K.shape(inputs)[1]
        hat_inputs = K.reshape(hat_inputs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3))

        b = K.zeros_like(hat_inputs[:, :, :, 0])
        for i in range(self.routings):
            c = util.softmax(b, 1)
            if K.backend() == 'theano':
                o = K.sum(o, axis=1)
            o = self.activation(K.batch_dot(c, hat_inputs, [2, 2]))
            if i < self.routings - 1:
                b += K.batch_dot(o, hat_inputs, [2, 3])
                if K.backend() == 'theano':
                    o = K.sum(o, axis=1)

        return o
示例#13
0
    def fit(self, sess, h_np, b_np, h_len, b_len, y, dev_h, dev_b, dev_h_len,
            dev_b_len, dev_y):
        losses_epochs = []
        dev_performances_epochs = []
        dev_predictions_epochs = []
        dev_predicted_classes_epochs = []

        for epoch in range(self.config.n_epochs):
            print('-------new epoch---------')
            loss = self.run_epoch(sess, h_np, b_np, h_len, b_len, y)

            # Computing predictions
            dev_predictions = self.predict_on_batch(sess, dev_h, dev_b,
                                                    dev_h_len, dev_b_len)

            # Computing development performance
            dev_predictions = softmax(np.array(dev_predictions))
            dev_predicted_classes = np.argmax(dev_predictions, axis=1)
            dev_performance = get_performance(dev_predicted_classes,
                                              dev_y,
                                              n_classes=4)

            # Adding to global outputs
            dev_predictions_epochs.append(dev_predictions)
            dev_predicted_classes_epochs.append(dev_predicted_classes)
            dev_performances_epochs.append(dev_performance)
            losses_epochs.append(loss)

            print('EPOCH: ', epoch, ', LOSS: ', np.mean(loss))

        return losses_epochs, dev_performances_epochs, dev_predicted_classes_epochs, dev_predictions_epochs
示例#14
0
def make_mutrel_from_trees_and_unique_clusterings(structs, llhs, clusterings):
    '''
  Relative to `make_mutrel_from_trees_and_single_clustering`, this function is
  slower and more memory intensive, but also more flexible. It differs in two
  respects:

  1. It doesn't assume that the user has already computed counts for all unique
  samples -- i.e., it allows duplicate samples.

  2. It allows unique clusterings for every sample.
  '''
    assert len(structs) == len(llhs) == len(clusterings)
    weights = util.softmax(llhs)
    vids = None

    for struct, clustering, weight in zip(structs, clusterings, weights):
        adjm = util.convert_parents_to_adjmatrix(struct)
        mrel = make_mutrel_from_cluster_adj(adjm, clustering)
        if vids is None:
            vids = mrel.vids
            soft_mutrel = np.zeros(mrel.rels.shape)
        else:
            assert mrel.vids == vids
        soft_mutrel += weight * mrel.rels

    soft_mutrel = fix_rounding_errors(soft_mutrel)
    return mutrel.Mutrel(
        vids=vids,
        rels=soft_mutrel,
    )
示例#15
0
文件: models.py 项目: zcmail/NHPoKD
    def hyper_parameter_learn(self, x, momentum=0.5):
        """
        Learn the hyper parameters of the model
        """
        opt_C_now = torch.zeros((self.n_weights, self.n_params),
                                dtype=torch.float64)
        log_w_arr = torch.zeros(self.n_weights, dtype=torch.float64)
        # Split the parameters into `alpha` and `beta`
        alpha = x[:self.n_params - 1]
        beta = x[self.n_params - 1:-2]
        alpha2 = x[-2:-1]
        beta2 = x[-1]
        # Sample noise
        sample_size = (self.n_weights, self.n_params)
        eps_arr = self.posterior.sample_epsilon(size=sample_size)
        for i in range(self.n_weights):
            eps = eps_arr[i]
            # Compute the importance weights (and their gradients)
            log_w_arr[i] = self._log_importance_weight(eps, alpha, beta,
                                                       alpha2, beta2)
            z_i, z_i2 = self.posterior.g(eps, alpha, beta, alpha2, beta2)
            z_i_temp, z_i_temp2 = self.prior.opt_hyper(z_i, z_i2)

            opt_C_now[i][:-1] = z_i_temp
            opt_C_now[i][-1] = z_i_temp2
            #print('len:',len(opt_C_now[i]))
            # Temper the weights
        log_w_arr /= self.weight_temp
        w_tilde = softmax(
            log_w_arr).detach()  # Detach `w_tilde` from backward computations
        # Compute the weighted average over all `n_weights` samples
        opt_C = torch.matmul(w_tilde.unsqueeze(0),
                             opt_C_now).squeeze().to(self.device)
        self.prior.C = (1 - momentum) * opt_C + momentum * self.prior.C
示例#16
0
    def get_root(self, state):
        main_board = state.board[:self.env.DIAGONAL].sum(axis=0)
        key = hash(main_board.tostring())
        if key in self.nodes.keys():
            root = self.nodes[key]
            # This makes this node root
            root.parent = None
        else:
            # Create a node
            # At the root node, evaluation and backup occurs at creation
            actions = state.meta.actions[state.board[-1, 0, 0]]
            prior_raw, V = self.evaluate(state.board)
            prior = np.zeros_like(prior_raw)
            noise = np.random.dirichlet(np.ones(len(actions)) * self.alpha)
            i0, i1, i2 = np.array(actions).T
            if self.data_format == 'channels_last':
                i2, i0, i1 = i0, i1, i2

            prior[np.zeros_like(i0), i0, i1, i2] = softmax(
                prior_raw[np.zeros_like(i0), i0, i1, i2]) + noise
            kwargs = {
                'state': state,
                'p': None,
                'prior': prior,
                'parent': None,
                'action_in': None,
                'actions': actions,
            }
            root = Node(**kwargs)
            self.nodes[key] = root
            self.backup(V, root)
        return root
示例#17
0
def make_mutrel_from_trees_and_single_clustering(structs, llhs, counts,
                                                 clustering):
    # Oftentimes, we will have many samples of the same adjacency matrix paired
    # with the same clustering. This will produce the same mutrel. As computing
    # the mutrel from adjm + clustering is expensive, we want to avoid repeating
    # this unnecessarily. Instead, we just modify the associated weight of the
    # the pairing to reflect this.
    #
    # Observe that if we have `C` copies of the LLH `W`, we obtain
    # equivalent post-softmax linear-space weights under either of the following
    # two methods:
    #
    # 1. (naive) Represent the associated samples `C` separate times in the softmax
    # 2. (smart) Set `W' = W + log(C)`, as `exp(W') = Cexp(W)`
    weights = util.softmax(llhs + np.log(counts))
    vids = None

    for struct, weight in zip(structs, weights):
        adjm = util.convert_parents_to_adjmatrix(struct)
        crel = make_clustrel_from_cluster_adj(adjm)

        if vids is None:
            vids = crel.vids
            soft_clustrel = np.zeros(crel.rels.shape)
        else:
            assert crel.vids == vids
        soft_clustrel += weight * crel.rels

    soft_clustrel = fix_rounding_errors(soft_clustrel)
    clustrel = mutrel.Mutrel(rels=soft_clustrel, vids=vids)
    mrel = make_mutrel_from_clustrel(clustrel, clustering)
    return mrel
示例#18
0
    def __forward(self, X, weights, biases, activation_func):
        """
        This is the feed-forward section of the neural network, where we actually feed the network data
        and ask it to predict it's class for us
        """
        # use dictionary to store different activation functions instead of many ifs
        activate = {
            'relu': util.relu,
            'tanh': util.tanh,
            'sigmoid': util.sigmoid
        }

        layers = len(weights.keys())
        Z = {}

        # Z at first hidden layer
        Z[0] = activate[activation_func](X.dot(weights[0]) + biases[0])

        # Z at other hidden layers
        for i in range(layers - 2):
            z_list = list(Z.keys())
            Z[i +
              1] = activate[activation_func](Z[z_list[-1]].dot(weights[i +
                                                                       1]) +
                                             biases[i + 1])

        # pY_given_x
        z_list = list(Z.keys())
        pY = util.softmax(Z[z_list[-1]].dot(weights[layers - 1]) +
                          biases[layers - 1])

        return Z, pY
示例#19
0
 def model(self, state, player):
     ''' Wrap the model to give the proper view and mask actions '''
     valid = self._game.valid(state, player)
     view = self._game.view(state, player)
     logits, value = self._model.model(view)
     probs = softmax(logits, valid)
     return probs, value
示例#20
0
def main():
    np.set_printoptions(linewidth=400,
                        precision=3,
                        threshold=sys.maxsize,
                        suppress=True)
    np.seterr(divide='raise', invalid='raise')
    V1, V2 = create_vars()

    estimators = (
        # lh.calc_lh_quad will be slower than usual on its first invocation due to
        # Numba JIT compilation. Don't be alarmed by seemingly poor runtime from it
        # as a result.
        lh.calc_lh_quad,
        lh.calc_lh_mc_1D,
        lh.calc_lh_mc_2D,
        lh.calc_lh_mc_2D_dumb,
        lh.calc_lh_grid,
    )
    max_estimator_len = max([len(M.__name__) for M in estimators])

    for M in estimators:
        M_name = M.__name__
        M = util.time_exec(M)
        evidence_per_sample = M(V1, V2)
        evidence_per_sample[:, common.Models.garbage] = lh.calc_garbage(V1, V2)
        evidence = np.sum(evidence_per_sample, axis=0)
        print(
            M_name.ljust(max_estimator_len),
            '%.3f ms' % util.time_exec._ms,
            evidence,
            util.softmax(evidence),
            sep='\t',
        )
示例#21
0
def calc_mutdist(cluster_phis, llhs, clusterings, baseline, counts):
    assert len(cluster_phis) == len(llhs) == len(clusterings) == len(counts)
    weights = util.softmax(llhs + np.log(counts))
    assert np.isclose(1, np.sum(weights))
    baseline_phis = baseline.stats

    vids = None
    # TODO: make assays meaningful, rather than just always setting it to None.
    assays = None
    dists = None

    for (cluster_phi, clustering, weight) in zip(cluster_phis, clusterings,
                                                 weights):
        cluster_phi = evalutil.fix_rounding_errors(cluster_phi)
        assert np.all(0 <= cluster_phi) and np.all(cluster_phi <= 1)
        V, membership = util.make_membership_mat(clustering)
        mphi = np.dot(membership, cluster_phi)

        if vids is None:
            vids = V
        assert V == vids
        if dists is None:
            dists = np.zeros(mphi.shape)

        weighted = weight * _calc_dist(mphi, baseline_phis)
        assert not np.any(np.isnan(weighted)) and not np.any(
            np.isinf(weighted))
        dists += weighted

    assert list(vids) == list(baseline.vids)
    if assays is not None:
        assert list(assays) == list(baseline.assays)
    return mutstat.Mutstat(vids=vids, assays=assays, stats=dists)
示例#22
0
    def sample(self, first=0, stop=-1, nchars=100):
        first = self.vocab[first].i
        stop = self.vocab[stop].i

        res = [first]
        dynet.renew_cg()
        state = self.rnn.initial_state()

        R = dynet.parameter(self.R)
        bias = dynet.parameter(self.bias)
        cw = first
        while True:
            #if cw.s in self.pron_dict.pdict:
            #    pron_vector = self.pron_dict.pdict[cw.s]
            #    pron_vector = dynet.inputVector(pron_vector)
            #else:
            spelling = [
                self.s2s.src_vocab[letter]
                for letter in self.vocab[cw].s.upper()
            ]
            embedded_spelling = self.s2s.embed_seq(spelling)
            pron_vector = self.s2s.encode_seq(embedded_spelling)[-1]

            x_t = pron_vector
            state = state.add_input(x_t)
            y_t = state.output()
            r_t = bias + (R * y_t)
            scores = r_t.vec_value()
            if self.vocab.unk is not None:
                ydist = util.softmax(
                    scores[:self.vocab.unk.i] +
                    scores[self.vocab.unk.i + 1:])  # remove UNK
                dist = ydist[:self.vocab.unk.i].tolist() + [
                    0
                ] + ydist[self.vocab.unk.i:].tolist()
            else:
                ydist = util.softmax(scores)
                dist = ydist
            rnd = random.random()
            for i, p in enumerate(dist):
                rnd -= p
                if rnd <= 0: break
            res.append(i)
            cw = i
            if cw == stop: break
            if nchars and len(res) > nchars: break
        return res
示例#23
0
文件: layer.py 项目: yujiali/nn
 def forward(self, Xtop):
     """Perform the forward pass, given the top layer output of the net, go
     through the output layer and compute the output."""
     self.Xtop = Xtop
     if self.act_type == util.OutputSpec.TYPE_LINEAR:
         self.Y = Xtop.dot(self.W)
     else:   # self.act_type == util.OutputSpec.TYPE_SOFTMAX
         self.Y = util.softmax(Xtop, self.W)
示例#24
0
 def selection_by_probs(self):
     """
     在训练的时候
     以softmax(N ** 1/t)的概率选择走法
     """
     N = [e.n**(1 / self.tree.t) for e in self.sub_edge]
     probs = util.softmax(N)
     return util.select_by_prob(self.sub_edge, probs)
示例#25
0
def gradients(W, x, y):
    """Gradient of cost function over all examples"""
    vec = np.dot(x, W);
    sigmoid_activation = softmax(vec)

    e = [compute_gradients(a, c, b) for a, c, b in izip(sigmoid_activation, y, x)]
    mean1 = np.sum(e, axis=0)
    return mean1
def main(architecture, folds, tta):
    test_dataset = InternValidDataset(transform=test_augm())
    labels = None
    for fold in folds:
        model = get_model(num_classes=test_dataset.num_classes,
                          architecture=architecture)
        state = torch.load('../results/{}/best-model_{}.pt'.format(
            architecture, fold))
        model.load_state_dict(state['model'])
        model.eval()
        labels = []
        with open('../results/{}/{}_valid_prob.csv'.format(architecture, fold),
                  "w") as f:
            for idx in tqdm.tqdm(range(len(test_dataset))):
                best_conf = 0
                best_pred = None
                for rot in range(4):
                    test_dataset.rot = rot
                    in1 = []
                    in2 = []
                    for _ in range(tta):
                        x = test_dataset[idx][0]
                        in1.append(x[0])
                        in2.append(x[1])
                    in1 = variable(torch.stack(in1))
                    in2 = variable(torch.stack(in2))
                    pred = model(in1, in2).data.cpu().numpy()
                    pred = np.array([softmax(x) for x in pred])
                    pred = np.sum(pred, axis=0) / len(pred)
                    if np.max(pred) > best_conf:
                        best_conf = np.max(pred)
                        best_pred = pred
                labels.append(test_dataset[idx][1])
                probas = ','.join([str(x) for x in best_pred])
                f.write('{}\n'.format(probas))

    dfs = [
        pd.read_csv('../results/{}/{}_valid_prob.csv'.format(architecture, i),
                    header=None) for i in folds
    ]
    classes = [
        'HTC-1-M7', 'LG-Nexus-5x', 'Motorola-Droid-Maxx', 'Motorola-Nexus-6',
        'Motorola-X', 'Samsung-Galaxy-Note3', 'Samsung-Galaxy-S4',
        'Sony-NEX-7', 'iPhone-4s', 'iPhone-6'
    ]
    for df in dfs:
        df.columns = classes
    df = dfs[0].copy()
    for i in np.arange(1, len(folds)):
        df[classes] += dfs[i][classes]
    df[classes] /= len(folds)
    matched = 0
    for i in np.arange(len(test_dataset)):
        pred = df[classes].iloc[i].values.argmax()
        real = labels[i]
        if pred == real:
            matched += 1
    print('accuracy = {}'.format(matched / len(test_dataset)))
示例#27
0
def gendata(n=FEATURES, m=EXAMPLES, c=CLASSES, seed=0):
    np.random.seed(seed)
    X = np.random.rand(m, n)
    X = np.hstack((X, np.ones((m,1)))) # Add column of 1's for bias
    factors = np.random.rand(n+1, c) * 20 - 10 
    factors = normalize_cols(factors)
    predict = np.dot(X, factors)
    Y = np.apply_along_axis(np.argmax, axis=1, arr=softmax(predict))
    return X, Y, factors
	def forward(self, input_layer, W, b):
		self.Z = np.dot(W, input_layer) + b
		self.A = util.softmax(self.Z)
		#print("OUTPUT:", self.Z.T[0])
		#print("OUTPUT:", self.A.T[0])
		#print("MIN:", np.min(self.A.T[0]))
		#print("MAX:", np.max(self.A.T[0]))
		#print("--------------")
		return self.A
示例#29
0
    def forward_pass(self, inputs):

        # decleare variables used forward pass
        self.inputs = inputs
        self.n_inp = len(inputs)
        self.vr = []
        self.vz = []
        self.v_h = []
        self.vo = []
        self.r = []
        self.z = []
        self._h = []
        self.h = {}
        self.o = []
        self.h[-1] = np.zeros((self.h_size, 1))

        # performing recurrsion
        for i in range(self.n_inp):

            # calculating reset gate value
            # self.vr.append(np.dot(self.w['ur'],inputs[i]) + np.dot(self.w['wr'], self.h[i-1]) + self.b['r'])
            # self.r.append(sigmoid(self.vr[i]))
            self.r.append(
                sigmoid(
                    np.dot(self.w['ur'], inputs[i]) +
                    np.dot(self.w['wr'], self.h[i - 1]) + self.b['r']))

            # calculation update gate value
            # self.vz.append(np.dot(self.w['uz'],inputs[i]) + np.dot(self.w['wz'], self.h[i-1])  + self.b['z'])
            # self.z.append(sigmoid(self.vz[i]))
            self.z.append(
                sigmoid(
                    np.dot(self.w['uz'], inputs[i]) +
                    np.dot(self.w['wz'], self.h[i - 1]) + self.b['z']))

            # applying reset gate value
            # self.v_h.append(np.dot(self.w['u_h'], inputs[i]) + np.dot(self.w['w_h'], np.multiply(self.h[i - 1], self.r[i])) +  + self.b['_h'])
            # self._h.append(tanh(self.v_h[i]))
            self._h.append(
                tanh(
                    np.dot(self.w['u_h'], inputs[i]) +
                    np.dot(self.w['w_h'], np.multiply(self.h[i -
                                                             1], self.r[i])) +
                    +self.b['_h']))

            # applying update gate value
            self.h[i] = np.multiply(self.z[i], self.h[i - 1]) + np.multiply(
                1 - self.z[i], self._h[i])

            # calculating output
            # self.vo.append(np.dot(self.w['wo'], self.h[i]) + self.b['o'])
            # self.o.append(softmax(self.vo[i]))
            self.o.append(
                softmax(np.dot(self.w['wo'], self.h[i]) + self.b['o']))

        return self.o
示例#30
0
def _calc_llh(var_reads, ref_reads, omega, A, Z, psi):
    K = len(psi)
    assert Z.shape == (K, K)
    assert var_reads.shape == ref_reads.shape == omega.shape

    eta = util.softmax(psi)
    phi = np.dot(Z, eta)  # Kx1
    var_phis = np.dot(A, phi)
    logp = binom.logpmf(var_reads, ref_reads + var_reads, var_phis * omega)
    return np.sum(logp)
示例#31
0
def sample(parameters, char_to_ix):
    """
    Sample a sequence of characters according to a sequence of probability
    distributions output of the RNN

    Arguments:
        parameters -- python dictionary containing the parameters Waa, Wax, Wya,
            by, and b.
            char_to_ix -- python dictionary mapping each character to an index.

    Returns:
        indices -- a list of length n containing the indices of the sampled
            characters.
    """
    # Retrieve parameters and relevant shapes from "parameters" dictionary
    Waa = parameters['Waa']
    Wax = parameters['Wax']
    Wya = parameters['Wya']
    by = parameters['by']
    b = parameters['b']
    vocab_size = by.shape[0]
    n_a = Waa.shape[1]

    x = np.zeros((vocab_size, 1))
    a_prev = np.zeros((n_a, 1))
    indices = []
    idx = -1

    # Loop over time-steps t. At each time-step, sample a character from a
    # probability distribution and append its index to "indices". We'll stop if
    # we reach 50 characters (which should be very unlikely with a well trained
    # model), which helps debugging and prevents entering an infinite loop.
    newline_character = char_to_ix['\n']

    while (idx != newline_character):
        # Forward propogate
        a = np.tanh(np.dot(Wax, x) + np.dot(Waa, a_prev) + b)
        z = np.dot(Wya, a) + by
        y = softmax(z)

        # Sample the index of a character within the vocabulary from the
        # probability distribution y
        idx = np.random.choice(list(range(0, vocab_size)),
                               p=np.ndarray.flatten(y))

        # Append the index to "indices"
        indices.append(idx)

        # Step 4: Overwrite the input character as the one corresponding to the
        # sampled index.
        x = np.zeros((vocab_size, 1))
        x[idx] = 1
        a_prev = a

    return indices
示例#32
0
    def expand_and_eval(self, leaf):
        action = random.choice(leaf.untried)
        leaf.untried.remove(action)

        time_s = time()
        state, reward, done, _ = self.env.step(leaf.state, action,
                                               leaf.actions)
        self.log_['exp/step']['time'] += time() - time_s
        self.log_['exp/step']['num'] += 1
        if done:
            actions = []
            prior = None
            V = reward
            reward = reward
        else:
            actions = state.meta.actions[state.board[-1, 0, 0]]
            time_e = time()
            prior_raw, V = self.evaluate(state.board)
            prior = np.zeros_like(prior_raw)
            noise = np.random.dirichlet(np.ones(len(actions)) * self.alpha)
            i0, i1, i2 = np.array(actions).T
            if self.data_format == 'channels_last':
                i0, i1, i2 = i1, i2, i0

            prior[np.zeros_like(i0), i0, i1, i2] = softmax(
                prior_raw[np.zeros_like(i0), i0, i1, i2]) + noise
            self.log_['exp/eval']['time'] += time() - time_e
            self.log_['exp/eval']['num'] += 1
            reward = None

        if self.data_format == 'channels_last':
            p = leaf.prior[(0, action[1], action[2], action[0])]
        else:
            p = leaf.prior[(0, *action)]
        kwargs = {
            'state': state,
            'p': p,
            'prior': prior,
            'parent': leaf,
            'action_in': action,
            'actions': actions,
            'reward': reward,
            'terminal': done,
        }
        child = Node(**kwargs)

        # Register node
        main_board = state.board[:self.env.DIAGONAL].sum(axis=0)
        key = hash(main_board.tostring())
        self.nodes[key] = child

        # Declar child of its parent
        leaf.children[action] = child
        return V, child
示例#33
0
    def forward(self, x):
        # 请从参数字典获取网络参数
        w1, b1 = self.params['W1'], self.params['b1']
        w2, b2 = self.params['W2'], self.params['b2']

        # 实现第一层的运算
        z1 = np.dot(x, w1) + b1
        h1 = sigmoid(z1)
        # 请实现第二层的运算
        z2 = np.dot(h1, w2) + b2
        return softmax(z2)
示例#34
0
def gradients2(W, x, y, reg_lambda=1.0):
    """Gradient of cost function over all examples"""
    C = W.shape[1]
    M, _ = X.shape
    vec = np.dot(x, W);
    #print vec
    probas = softmax(vec) # M x C
    indicator_y = indicator_matrix(y, C) # M x C
    gradients = - np.dot((indicator_y - probas).T, x).T  # C * N
    gradients += reg_lambda * W 
    return gradients
示例#35
0
 def forward_propagation(self,x):
     # The total number of time steps
     T=len(x)
     # During forwoard propagation we save all hidden states in s because need them later.
     s=np.zeros((T+1,self.hidden_dim))
     s[-1]=np.zeros(self.hidden_dim)
     # The outputs at each time step. Again, we them for later.
     o=np.zeros((T,self.word_dim))
     # For each time step...
     for t in np.arange(T):
         # Note that we are indexing U by x[t]. This is the same as multiplying U with a one-hot vector.
         s[t]=np.tanh(self.U[:,x[t]]+self.W.dot(s[t-1]))
         o[t]=softmax(self.V.dot(s[t]))
     return [o,s]
示例#36
0
 def get_pred(self, img):
     '''img: face image, typically the cropped face based on bbx'''
     img = cv2.resize(img, (224, 224))
     self.img = copy.deepcopy(img)
     img = img.transpose(2, 0, 1)
     img = img[(2, 1, 0), :, :]  # TODO
     img *= 255
     img -= self.mean.reshape(3, 224, 224)
     img *= 0.01
     self.net.blobs['data'].reshape(1, 3, 224, 224)
     self.net.blobs['data'].data[:] = img
     out = self.net.forward()
     pred = np.reshape(out[self.layername], (68, 56 * 56))
     pred = softmax(pred)
     return np.reshape(pred, (1, 68, 56, 56))
示例#37
0
    def supAnalyser(self,X,freq,vocabulary,top=20):
        result_score=[]
        result_word=[]
        for i in range(self.cat):
            result_score.append([0.0]*top)
            result_word.append(['']*top)

        num_sent=np.size(X,0)
        allKids=[[]]*num_sent

        for i in range(num_sent):
            x=X[i]
            sl=len(x)
            words_embedded=self.WL[:,x]
            unsup_tree = self.forwardProp([],words_embedded,False,None,self.theta,freq)
            allKids[i]=unsup_tree.kids

            sup_tree=rnntree.rnntree(self.d,sl,words_embedded)

            nodeUnder = np.ones([2*sl-1,1])

            for j in range(sl,2*sl-1): # calculate n1, n2 and n1+n2 for each node in the sensentree and store in nodeUnder
                kids = allKids[i][j]
                n1 = nodeUnder[kids[0]]
                n2 = nodeUnder[kids[1]]
                nodeUnder[j] = n1+n2

            #sentree.catDelta = np.zeros([cat_size, 2*sl-1])
            #sentree.catDelta_out = np.zeros([self.d,2*sl-1])

            for j in range(2*sl-1):
                kids = allKids[i][j]

                c1 = sup_tree.nodeFeatures[:,kids[0]]
                c2 = sup_tree.nodeFeatures[:,kids[1]]

                # Eq. [2] in the paper: p = f(W[1][c1 c2] + b[1])
                p = tanh(np.dot(self.W1,c1) + np.dot(self.W2,c2) + self.b1)

                # See last paragraph in Section 2.3
                p_norm1 = p/norm(p)

                # Eq. (7) in the paper (for special case of 1d label)
                #sm = sigmoid(np.dot(Wlab,p_norm1) + blab)
                sm=softmax(np.dot(self.Wlab,p_norm1) + self.blab)
                #max_score=max(sm)
                for ind in range(self.cat):
                    max_score=sm[ind]
                    #ind=list(sm).index(max_score)
                    min_score=min(result_score[ind])
                    if max_score>min_score:
                        min_ind=result_score[ind].index(min_score)
                        result_score[ind][min_ind]=max_score
                        if j<sl:
                            result_word[ind][min_ind]=vocabulary[x[j]]
                        else:
                            stk=[]
                            stk.extend(list(kids))
                            stk.reverse()
                            words=[]
                            while len(stk)!=0:
                                current=stk.pop()
                                if current<sl:
                                    words.append(vocabulary[x[current]])
                                else:
                                    toExtend=[]
                                    toExtend.extend(list(allKids[i][current]))
                                    toExtend.reverse()
                                    stk.extend(toExtend)

                            result_word[ind][min_ind]=' '.join(words)
        return (result_score,result_word)
示例#38
0
def predict(W, x):
    """function predicts the probability of input vector x
       the output y is MX1 vector (M is no of classse)
    """
    values = softmax(np.dot(x, W))
    return np.argmax(values, axis=1)
 def forward(self, X):
     # Z = relu(X.dot(self.W1) + self.b1)
     Z = np.tanh(X.dot(self.W1) + self.b1)
     return softmax(Z.dot(self.W2) + self.b2), Z
示例#40
0
    def forwardProp(self,allKids,words_embedded,updateWlab,label,theta,freq):
        #allkids存的是所有节点,第i行存第i个节点,列表示第i行节点所包含的子节点
        (W1,W2,W3,W4,Wlab,b1,b2,b3,blab,WL)=self.getParams(theta)
        #s1可能是词汇表的大小        
        sl=np.size(words_embedded,1)
        sentree=rnntree.rnntree(self.d,sl,words_embedded)
        collapsed_sentence = range(sl)
        #计算情感误差
        if updateWlab:
            temp_label=np.zeros(self.cat)
            #label表示当前标签,label-1主要是因为list从0开始,即当前标签的位置为1
            temp_label[label-1]=1.0
            nodeUnder = np.ones([2*sl-1,1])
            #n1,n2是kids的子节点数
            for i in range(sl,2*sl-1): # calculate n1, n2 and n1+n2 for each node in the sensentree and store in nodeUnder
                kids = allKids[i] 
                n1 = nodeUnder[kids[0]] #左节点
                n2 = nodeUnder[kids[1]] #右节点
                nodeUnder[i] = n1+n2    #第i个节点的子节点数目

            cat_size=self.cat
            sentree.catDelta = np.zeros([cat_size, 2*sl-1])
            sentree.catDelta_out = np.zeros([self.d,2*sl-1])

            # classifier on single words
            for i in range(sl):
                sm = softmax(np.dot(Wlab,words_embedded[:,i]) + blab)
                #这里代码部分计算情感误差和论文不太一样,这里直接用yi-h(x)来表示情感误差
                lbl_sm = (1-self.alpha)*(temp_label - sm)
                #这里貌似是在计算J
                sentree.nodeScores[i] = 1.0/2.0*(np.dot(lbl_sm,(temp_label- sm)))  #sentree.nodeScores分为2个部分,这里计算0-s1,下面计算2*s1-1
                sentree.catDelta[:, i] = -np.dot(lbl_sm,softmax_prime(sm))

            # sm = sigmoid(self.Wlab*words_embedded + self.blab)

            #lbl_sm = (1-self.alpha)*(label[:,np.ones(sl,1)] - sm)
            #sentree.nodeScores[:sl] = 1/2*(lbl_sm.*(label(:,ones(sl,1)) - sm))
            #sentree.catDelta[:, :sl] = -(lbl_sm).*sigmoid_prime(sm)

            for i in range(sl,2*sl-1):
                #kids,c1,c2 是什么
                kids = allKids[i]

                c1 = sentree.nodeFeatures[:,kids[0]]   #左孩子的词向量
                c2 = sentree.nodeFeatures[:,kids[1]]   #右孩子的词向量

                # Eq. [2] in the paper: p = f(W[1][c1 c2] + b[1])
                p = tanh(np.dot(W1,c1) + np.dot(W2,c2) + b1)

                # See last paragraph in Section 2.3
                p_norm1 = p/norm(p)

                # Eq. (7) in the paper (for special case of 1d label)
                #sm = sigmoid(np.dot(Wlab,p_norm1) + blab)
                sm=softmax(np.dot(Wlab,p_norm1) + blab)
                beta=0.5  #论文里面本来是没有beta这个值的
                #lbl_sm = beta * (1.0-self.alpha)*(label - sm)
                lbl_sm = beta * (1.0-self.alpha)*(temp_label - sm)
                #lbl_sm = beta * (1.0-self.alpha) * (temp_label-sm)
                #sentree.catDelta[:, i] = -softmax_prime(sm)[:,label-1]
                #J=-(1.0-self.alpha)*np.log(sm[label-1])
                #sentree.catDelta[:, i] = -np.dot(lbl_sm,sigmoid_prime(sm))
                sentree.catDelta[:, i] = -np.dot(lbl_sm,softmax_prime(sm))
                #J = 1.0/2.0*(np.dot(lbl_sm,(label - sm)))
                J = 1.0/2.0*(np.dot(lbl_sm,(temp_label - sm)))

                sentree.nodeFeatures[:,i] = p_norm1
                sentree.nodeFeatures_unnormalized[:,i] = p
                sentree.nodeScores[i] = J
                sentree.numkids = nodeUnder

            sentree.kids = allKids
        #计算重构误差
        else:
            # Reconstruction Error
            for j in range(sl-1):
                size2=np.size(words_embedded,1)
                c1 = words_embedded[:,0:-1] 
                c2 = words_embedded[:,1:]

                freq1 = freq[0:-1]
                freq2 = freq[1:]

                p = tanh(np.dot(W1,c1) + np.dot(W2,c2) + np.reshape(b1,[self.d,1])*([1]*(size2-1)))
                p_norm1 =p/np.sqrt(sum(p**2))
                #下方y1,y2实际上就是论文的c1,c2,由p分解而来。
                y1_unnormalized = tanh(np.dot(W3,p_norm1) + np.reshape(b2,[self.d,1])*([1]*(size2-1)))
                y2_unnormalized = tanh(np.dot(W4,p_norm1) + np.reshape(b3,[self.d,1])*([1]*(size2-1)))

                y1 = y1_unnormalized/np.sqrt(sum(y1_unnormalized**2))
                y2 = y2_unnormalized/np.sqrt(sum(y2_unnormalized**2))

                y1c1 = self.alpha*(y1-c1)
                y2c2 = self.alpha*(y2-c2)

                # Eq. (4) in the paper: reconstruction error:重构误差
                #(y1-c1)*(y1-c1)的结果是一个数值
                J = 1.0/2.0*sum((y1c1)*(y1-c1) + (y2c2)*(y2-c2))
                
                #这个for循环的下面部分没看懂
                # finding the pair with smallest reconstruction error for constructing sentree
                #min(J)是什么意思,J是一个值
                J_min= min(J)
                J_minpos=np.argmin(J)
                #重构误差最小的重构向量存入树中(c1',c2')
                sentree.node_y1c1[:,sl+j] = y1c1[:,J_minpos]
                sentree.node_y2c2[:,sl+j] = y2c2[:,J_minpos]
                #可能是更新值
                sentree.nodeDelta_out1[:,sl+j] = np.dot(norm1tanh_prime(y1_unnormalized[:,J_minpos]) , y1c1[:,J_minpos])
                sentree.nodeDelta_out2[:,sl+j] = np.dot(norm1tanh_prime(y2_unnormalized[:,J_minpos]) , y2c2[:,J_minpos])

                words_embedded=np.delete(words_embedded,J_minpos+1,1)
                words_embedded[:,J_minpos]=p_norm1[:,J_minpos]
                sentree.nodeFeatures[:, sl+j] = p_norm1[:,J_minpos]
                sentree.nodeFeatures_unnormalized[:, sl+j]= p[:,J_minpos]
                sentree.nodeScores[sl+j] = J_min
                sentree.pp[collapsed_sentence[J_minpos]] = sl+j
                sentree.pp[collapsed_sentence[J_minpos+1]] = sl+j
                sentree.kids[sl+j,:] = [collapsed_sentence[J_minpos], collapsed_sentence[J_minpos+1]]
                sentree.numkids[sl+j] = sentree.numkids[sentree.kids[sl+j,0]] + sentree.numkids[sentree.kids[sl+j,1]]


                freq=np.delete(freq,J_minpos+1)
                freq[J_minpos] = (sentree.numkids[sentree.kids[sl+j,0]]*freq1[J_minpos] + sentree.numkids[sentree.kids[sl+j,1]]*freq2[J_minpos])/(sentree.numkids[sentree.kids[sl+j,0]]+sentree.numkids[sentree.kids[sl+j,1]])

                collapsed_sentence=np.delete(collapsed_sentence,J_minpos+1)
                collapsed_sentence[J_minpos]=sl+j
        return sentree
示例#41
0
def rbmFit(X, numHid, y, isSaveModel=False, name=None, **kwargs) :
    """
    X              ... data. should be binary, or in [0,1] interpreted as
                   ... probabilities
    numhid         ... number of hidden units
    y              ... List of discrete labels

    nClass          number of classes
    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis

    batchsize       The number of training instances per batch
    verbose         For printing progress

    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer

    model.weightlabel       ... The weights on labels layer
    model.biasLabel       ... The biases on labels layer

    errors          The errors in reconstruction at each epoch
       """

    arg = util.processOptions(kwargs, \
                            nClass = np.unique(y).size, \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.5,\
                            maxEpoch = 500, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 100, \
                            verbose = True)
    [nClass, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\
        arg["nClass"],\
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"]
    ]

    if verbose :
        print "Processing data ..."

    # from which step, we start to compute the average
#    avgStart = maxEpoch - avgLast

    # for weight decay use
#    oldPenalty = penalty

    # numCases : number of example
    # numDims : the length of each example
    # each row is an example
    [numCases, numDims] = list(X.shape)

    numVis = numDims
    uniqueLabel = np.unique(y)
    numBatch = util.ceil(numCases, batchSize)

    y = util.matrixLabel(y)

    # shuffle data and label
    data = copy.deepcopy(X)
    [data, label] = util.shuffle(data, y)

    # init CUDA
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))
    deviceLabel = cm.CUDAMatrix(cm.reformat(label))

    # init weights
    weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))
    weightLabel = cm.CUDAMatrix(0.1*np.random.randn(nClass, numHid))
    biasLabel = cm.CUDAMatrix(np.zeros((1,nClass)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1,numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1,numHid)))
    weightLabelInc = cm.CUDAMatrix(np.zeros((nClass, numHid)))
    biasLabelInc = cm.CUDAMatrix(np.zeros((1,nClass)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidState = cm.empty((batchSize, numHid))

    for epoch in range(maxEpoch) :
        error = []

        for batch in range(numBatch) :
            # train each data batch
            if batchSize*(batch+1) > numCases :
                visTrue = deviceData.get_row_slice(batchSize*batch, numCases)
                labelTrue = deviceLabel.get_row_slice(batchSize*batch, numCases)
                batchSize = visTrue.shape[0]

                visActP = cm.empty((batchSize, numVis))
                hidActP = cm.empty((batchSize, numHid))
                hidState = cm.empty((batchSize, numHid))
            else :
                visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1))
                labelTrue = deviceLabel.get_row_slice(batchSize*batch, batchSize*(batch+1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            #apply momentum
            weightInc.mult(momentum)
            biasVInc.mult(momentum)
            biasHInc.mult(momentum)
            weightLabel.mult(momentum)
            biasLabel.mult(momentum)

            # positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_dot(labelTrue, weightLabel)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.add_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0)
            biasHInc.add_sums(hidActP, axis=0)
            weightLabelInc.add_dot(labelTrue.T, hidActP)
            biasLabelInc.add_sums(labelTrue, axis=0)

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidActP)

            if cmp(method, "SML") == 0 :
                if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) :
                    pass # here does not need in practical use
            elif cmp(method, "CD") == 0 :
                pass

            # negative phase
            cm.dot(hidActP, weight.T, target = visActP)
            visActP.add_row_vec(biasV)
            visActP.apply_sigmoid()

            cm.dot(hidActP, weightLabel.T, target = labelTrue)
            labelTrue.add_row_vec(biasLabel)
            labelTrue = util.softmax(labelTrue)

            # another positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_dot(labelTrue, weightLabel)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.subtract_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0, mult=-1)
            biasHInc.add_sums(hidActP, axis=0, mult=-1)
            weightLabelInc.subtract_dot(labelTrue.T, hidActP)
            biasLabelInc.add_sums(labelTrue, axis=0, mult=-1)

            # update weights and bias
            weight.add_mult(weightInc, eta/batchSize)
            biasV.add_mult(biasVInc, eta/batchSize)
            biasH.add_mult(biasHInc, eta/batchSize)
            weightLabel.add_mult(weightLabelInc, eta/batchSize)
            biasLabel.add_mult(biasLabelInc, eta/batchSize)

            # calculate reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm()**2)

            # free memory
            visTrue.free_device_memory()
            labelTrue.free_device_memory()

        if verbose :
            print "Epoch %d/%d, reconstruction error is %f " % (epoch+1, maxEpoch, sum(error))

    # save rbm model
    weight.copy_to_host()
    biasV.copy_to_host()
    biasH.copy_to_host()
    weightLabel.copy_to_host()
    biasLabel.copy_to_host()

    model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, biasH.numpy_array, \
                        weightLabel = weightLabel.numpy_array,\
                        biasLabel = biasLabel.numpy_array, labels = uniqueLabel)

    # free device memory
    deviceData.free_device_memory()
    deviceLabel.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    biasH.free_device_memory()
    weightLabel.free_device_memory()
    biasLabel.free_device_memory()

    weightInc.free_device_memory()
    biasVInc.free_device_memory()
    biasHInc.free_device_memory()
    weightLabelInc.free_device_memory()
    biasLabelInc.free_device_memory()

    hidActP.free_device_memory()
    visActP.free_device_memory()
    hidState.free_device_memory()

    cm.shutdown()

    if isSaveModel :
        modelList = []
        modelList.append(model_)
        model = np.array(modelList)
        np.save(name,model)

    return model_
示例#42
0
  outRoot = sys.argv[3]
  nets = []
  for i in range(len(protos)):
    nets.append(caffe.Net(protos[i], models[i], caffe.TEST))
  filenames = get_filenames(filelists)
  filenames = filenames[555:]
  random.shuffle(filenames)

  # save
  f = 'ibug/image_051_1.jpg'
  img = caffe.io.load_image(root + f)
  for i in range(len(nets)):
    pred = get_preds_single(nets[i], layernames[i], img)
    response_map = pred[0, 0]
    shape = response_map.shape
    response_map = softmax(response_map.reshape((1, shape[0]*shape[1])))
    response_map = np.reshape(response_map, shape)
    plt.imsave(outRoot + str(i) + "_" + f[5:], response_map, cmap='gray', vmin=response_map.min(), vmax=response_map.max())
  exit(0)
  for f in filenames:
    print f
    img = caffe.io.load_image(root + f)
    for i in range(len(nets)):
      pred = get_preds_single(nets[i], layernames[i], img)
      response_map = pred[0, (0, 36, 30, 57)]
      shape = response_map.shape
      response_map = softmax(response_map.reshape((shape[0], shape[1]*shape[2])))
      response_map = response_map.reshape(shape)
      for j in range(shape[0]):
        plt.subplot(4, 4, i*4 + j + 1)
        plt.imshow(response_map[j], cmap='gray', vmin=0, vmax=0.2)
示例#43
0
if __name__ == '__main__':
    # usage: python shift_exp.py prototxt model layername root 
    # filelists outRoot
    prototxt = sys.argv[1]
    model = sys.argv[2]
    layername = sys.argv[3]
    root = sys.argv[4]
    filelists = sys.argv[5]
    outRoot = sys.argv[6]

    net = caffe.Net(prototxt, model, caffe.TEST)
    (filenames, bbxs) = get_filenames_bbx(filelists)
    index = range(len(filenames))
    random.shuffle(index)
    for i in index:
        print i, filenames[i], bbxs[i][0], bbxs[i][1], bbxs[i][2], bbxs[i][3]
        img_crops = shift_exp(root, filenames[i], bbxs[i], outRoot)
        preds = get_preds_multiple(net, layername, img_crops)
        preds_shape = preds.shape
        preds = softmax(np.reshape(preds, (preds_shape[0]*preds_shape[1], \
            preds_shape[2]*preds_shape[3])))
        preds = np.reshape(preds, preds_shape)
        (hp, wp) = get_index(preds)
        hp = hp * 4
        wp = wp * 4
        for i in range(9):
            plt.subplot(3, 3, i+1)
            plt.imshow(img_crops[i])
            plt.plot(wp[i], hp[i],'.g', hold=True)
        plt.show()
示例#44
0
    def forwardProp(self,allKids,words_embedded,updateWlab,label,theta,freq):
        (W1,W2,W3,W4,Wlab,b1,b2,b3,blab,WL)=self.getParams(theta)
        sl=np.size(words_embedded,1)
        sentree=rnntree.rnntree(self.d,sl,words_embedded)
        collapsed_sentence = range(sl)
        if updateWlab:
            temp_label=np.zeros(self.cat)
            temp_label[label-1]=1.0
            nodeUnder = np.ones([2*sl-1,1])

            for i in range(sl,2*sl-1): # calculate n1, n2 and n1+n2 for each node in the sensentree and store in nodeUnder
                kids = allKids[i]
                n1 = nodeUnder[kids[0]]
                n2 = nodeUnder[kids[1]]
                nodeUnder[i] = n1+n2

            cat_size=self.cat
            sentree.catDelta = np.zeros([cat_size, 2*sl-1])
            sentree.catDelta_out = np.zeros([self.d,2*sl-1])

            # classifier on single words
            for i in range(sl):
                sm = softmax(np.dot(Wlab,words_embedded[:,i]) + blab)
                lbl_sm = (1-self.alpha)*(temp_label - sm)
                sentree.nodeScores[i] = 1.0/2.0*(np.dot(lbl_sm,(temp_label- sm)))
                sentree.catDelta[:, i] = -np.dot(lbl_sm,softmax_prime(sm))

            # sm = sigmoid(self.Wlab*words_embedded + self.blab)

            #lbl_sm = (1-self.alpha)*(label[:,np.ones(sl,1)] - sm)
            #sentree.nodeScores[:sl] = 1/2*(lbl_sm.*(label(:,ones(sl,1)) - sm))
            #sentree.catDelta[:, :sl] = -(lbl_sm).*sigmoid_prime(sm)

            for i in range(sl,2*sl-1):
                kids = allKids[i]

                c1 = sentree.nodeFeatures[:,kids[0]]
                c2 = sentree.nodeFeatures[:,kids[1]]

                # Eq. [2] in the paper: p = f(W[1][c1 c2] + b[1])
                p = tanh(np.dot(W1,c1) + np.dot(W2,c2) + b1)

                # See last paragraph in Section 2.3
                p_norm1 = p/norm(p)

                # Eq. (7) in the paper (for special case of 1d label)
                #sm = sigmoid(np.dot(Wlab,p_norm1) + blab)
                sm=softmax(np.dot(Wlab,p_norm1) + blab)
                beta=0.5
                #lbl_sm = beta * (1.0-self.alpha)*(label - sm)
                lbl_sm = beta * (1.0-self.alpha)*(temp_label - sm)
                #lbl_sm = beta * (1.0-self.alpha) * (temp_label-sm)
                #sentree.catDelta[:, i] = -softmax_prime(sm)[:,label-1]
                #J=-(1.0-self.alpha)*np.log(sm[label-1])
                #sentree.catDelta[:, i] = -np.dot(lbl_sm,sigmoid_prime(sm))
                sentree.catDelta[:, i] = -np.dot(lbl_sm,softmax_prime(sm))
                #J = 1.0/2.0*(np.dot(lbl_sm,(label - sm)))
                J = 1.0/2.0*(np.dot(lbl_sm,(temp_label - sm)))

                sentree.nodeFeatures[:,i] = p_norm1
                sentree.nodeFeatures_unnormalized[:,i] = p
                sentree.nodeScores[i] = J
                sentree.numkids = nodeUnder

            sentree.kids = allKids
        else:
            # Reconstruction Error
            for j in range(sl-1):
                size2=np.size(words_embedded,1)
                c1 = words_embedded[:,0:-1]
                c2 = words_embedded[:,1:]

                freq1 = freq[0:-1]
                freq2 = freq[1:]

                p = tanh(np.dot(W1,c1) + np.dot(W2,c2) + np.reshape(b1,[self.d,1])*([1]*(size2-1)))
                p_norm1 =p/np.sqrt(sum(p**2))

                y1_unnormalized = tanh(np.dot(W3,p_norm1) + np.reshape(b2,[self.d,1])*([1]*(size2-1)))
                y2_unnormalized = tanh(np.dot(W4,p_norm1) + np.reshape(b3,[self.d,1])*([1]*(size2-1)))

                y1 = y1_unnormalized/np.sqrt(sum(y1_unnormalized**2))
                y2 = y2_unnormalized/np.sqrt(sum(y2_unnormalized**2))

                y1c1 = self.alpha*(y1-c1)
                y2c2 = self.alpha*(y2-c2)

                # Eq. (4) in the paper: reconstruction error
                J = 1.0/2.0*sum((y1c1)*(y1-c1) + (y2c2)*(y2-c2))

                # finding the pair with smallest reconstruction error for constructing sentree
                J_min= min(J)
                J_minpos=np.argmin(J)

                sentree.node_y1c1[:,sl+j] = y1c1[:,J_minpos]
                sentree.node_y2c2[:,sl+j] = y2c2[:,J_minpos]
                sentree.nodeDelta_out1[:,sl+j] = np.dot(norm1tanh_prime(y1_unnormalized[:,J_minpos]) , y1c1[:,J_minpos])
                sentree.nodeDelta_out2[:,sl+j] = np.dot(norm1tanh_prime(y2_unnormalized[:,J_minpos]) , y2c2[:,J_minpos])

                words_embedded=np.delete(words_embedded,J_minpos+1,1)
                words_embedded[:,J_minpos]=p_norm1[:,J_minpos]
                sentree.nodeFeatures[:, sl+j] = p_norm1[:,J_minpos]
                sentree.nodeFeatures_unnormalized[:, sl+j]= p[:,J_minpos]
                sentree.nodeScores[sl+j] = J_min
                sentree.pp[collapsed_sentence[J_minpos]] = sl+j
                sentree.pp[collapsed_sentence[J_minpos+1]] = sl+j
                sentree.kids[sl+j,:] = [collapsed_sentence[J_minpos], collapsed_sentence[J_minpos+1]]
                sentree.numkids[sl+j] = sentree.numkids[sentree.kids[sl+j,0]] + sentree.numkids[sentree.kids[sl+j,1]]


                freq=np.delete(freq,J_minpos+1)
                freq[J_minpos] = (sentree.numkids[sentree.kids[sl+j,0]]*freq1[J_minpos] + sentree.numkids[sentree.kids[sl+j,1]]*freq2[J_minpos])/(sentree.numkids[sentree.kids[sl+j,0]]+sentree.numkids[sentree.kids[sl+j,1]])

                collapsed_sentence=np.delete(collapsed_sentence,J_minpos+1)
                collapsed_sentence[J_minpos]=sl+j
        return sentree
示例#45
0
文件: SoftmaxLayer.py 项目: dxmtb/nn
 def activate(self, input):
     a = np.dot(input, self.W) + self.b
     ret = softmax(a)
     return ret
示例#46
0
    def forwardProp(self,allKids,words_embedded,updateWlab,label,theta,freq):
        (W1,W2,W3,W4,Wlab,b1,b2,b3,blab,WL)=self.getParams(theta)
        #sl是words_embedded的个数,一句话单词的个数
        # allKids一开始没有值,是因为训练之前,语法树本来就没有构建完,树结构是训练完了以后才出现的。但是,allkids内容应该会随着算法的进行而变化
        sl=np.size(words_embedded,1)
        sentree=rnntree.rnntree(self.d,sl,words_embedded)
        collapsed_sentence = range(sl)

        # updateWlab主要是获得情感误差,修正情感的权值
        # 情感误差也是需要p作为输入的,因此也需要计算出p
        if updateWlab:
            temp_label=np.zeros(self.cat)
            #假设cat = 4, temp_label就是(0,0,0,0)。下面这句话的意思是label对应的位置为1
            temp_label[label-1]=1.0
            nodeUnder = np.ones([2*sl-1,1])

            # 这个for循环是计算出,某个节点底下一共有多少个子节点
            # kids存了两个值,分别代表左右孩子。
            # 可以推测出,allkids存的东西,allkids[i]代表第i个非叶子节点,allkids[i][0]是左孩子,allkids[i][1]是右孩子
            for i in range(sl,2*sl-1): # calculate n1, n2 and n1+n2 for each node in the sensentree and store in nodeUnder
                kids = allKids[i]
                n1 = nodeUnder[kids[0]]
                n2 = nodeUnder[kids[1]]
                nodeUnder[i] = n1+n2

            cat_size=self.cat
            sentree.catDelta = np.zeros([cat_size, 2*sl-1])
            sentree.catDelta_out = np.zeros([self.d,2*sl-1])

            # classifier on single words
            # 处理所有单词,即叶子节点
            # 这里有个问题就是,为什么叶子节点也要计算情感误差
            for i in range(sl):
                sm = softmax(np.dot(Wlab,words_embedded[:,i]) + blab)
                #这里不管情感误差是如何计算的,sentree.nodeScores存的是情感误差没错了。
                #sentree.catDelta存的什么不清楚,但是和情感误差有关
                lbl_sm = (1-self.alpha)*(temp_label - sm)
                sentree.nodeScores[i] = 1.0/2.0*(np.dot(lbl_sm,(temp_label- sm)))
                sentree.catDelta[:, i] = -np.dot(lbl_sm,softmax_prime(sm))

            # sm = sigmoid(self.Wlab*words_embedded + self.blab)

            #lbl_sm = (1-self.alpha)*(label[:,np.ones(sl,1)] - sm)
            #sentree.nodeScores[:sl] = 1/2*(lbl_sm.*(label(:,ones(sl,1)) - sm))
            #sentree.catDelta[:, :sl] = -(lbl_sm).*sigmoid_prime(sm)

            #超过sl的部分是单词的父亲节点
            for i in range(sl,2*sl-1):
                kids = allKids[i]
                #c1,c2,是左右孩子的向量
                c1 = sentree.nodeFeatures[:,kids[0]]
                c2 = sentree.nodeFeatures[:,kids[1]]

                # Eq. [2] in the paper: p = f(W[1][c1 c2] + b[1])
                #计算p,显然p是个数值,即得分,用于判断哪两个节点合并
                p = tanh(np.dot(W1,c1) + np.dot(W2,c2) + b1)

                # See last paragraph in Section 2.3
                p_norm1 = p/norm(p)

                # Eq. (7) in the paper (for special case of 1d label)
                #sm = sigmoid(np.dot(Wlab,p_norm1) + blab)
                #这里是计算节点的情感标签,sm
                sm = softmax(np.dot(Wlab,p_norm1) + blab)
                beta=0.5
                #lbl_sm = beta * (1.0-self.alpha)*(label - sm)
                lbl_sm = beta * (1.0-self.alpha)*(temp_label - sm)
                #lbl_sm = beta * (1.0-self.alpha) * (temp_label-sm)
                #sentree.catDelta[:, i] = -softmax_prime(sm)[:,label-1]
                #J=-(1.0-self.alpha)*np.log(sm[label-1])
                #sentree.catDelta[:, i] = -np.dot(lbl_sm,sigmoid_prime(sm))
                sentree.catDelta[:, i] = -np.dot(lbl_sm,softmax_prime(sm))
                #J = 1.0/2.0*(np.dot(lbl_sm,(label - sm)))
                J = 1.0/2.0*(np.dot(lbl_sm,(temp_label - sm)))

                sentree.nodeFeatures[:,i] = p_norm1
                sentree.nodeFeatures_unnormalized[:,i] = p
                sentree.nodeScores[i] = J
                sentree.numkids = nodeUnder

            sentree.kids = allKids
        else:
            # 这里主要是计算重构误差
            # Reconstruction Error
            for j in range(sl-1):
                size2=np.size(words_embedded,1)

                """
                 经过测试,p有多个值
                 也就不难怪这里c1,c2里面分别存了多个单词的向量
                 因此,这个算法并不是一个个依次算p的,而是一次性一起算出来p
                 也因此J的值应该也是有多个值。代表两两单词计算的不同结果。
                """
                c1 = words_embedded[:,0:-1] # 去掉最后一个单词
                c2 = words_embedded[:,1:]  # 去掉第一个单词

                freq1 = freq[0:-1]
                freq2 = freq[1:]

                p = tanh(np.dot(W1,c1) + np.dot(W2,c2) + np.reshape(b1,[self.d,1])*([1]*(size2-1)))
                p_norm1 =p/np.sqrt(sum(p**2))

                y1_unnormalized = tanh(np.dot(W3,p_norm1) + np.reshape(b2,[self.d,1])*([1]*(size2-1)))
                y2_unnormalized = tanh(np.dot(W4,p_norm1) + np.reshape(b3,[self.d,1])*([1]*(size2-1)))

                y1 = y1_unnormalized/np.sqrt(sum(y1_unnormalized**2))
                y2 = y2_unnormalized/np.sqrt(sum(y2_unnormalized**2))

                y1c1 = self.alpha*(y1-c1)
                y2c2 = self.alpha*(y2-c2)

                # Eq. (4) in the paper: reconstruction error
                J = 1.0/2.0*sum((y1c1)*(y1-c1) + (y2c2)*(y2-c2))

                # finding the pair with smallest reconstruction error for constructing sentree
                J_min= min(J)
                J_minpos=np.argmin(J)

                """
                只有非叶子节点才会有重构节点,因此,sentree.node_y1c1需要从sl+j开始存y1c1.
                """
                sentree.node_y1c1[:,sl+j] = y1c1[:,J_minpos]
                sentree.node_y2c2[:,sl+j] = y2c2[:,J_minpos]
                sentree.nodeDelta_out1[:,sl+j] = np.dot(norm1tanh_prime(y1_unnormalized[:,J_minpos]) , y1c1[:,J_minpos])
                sentree.nodeDelta_out2[:,sl+j] = np.dot(norm1tanh_prime(y2_unnormalized[:,J_minpos]) , y2c2[:,J_minpos])

                #一对节点被选中以后,需要删除words_embedded对应的向量
                #还要把合成的节点加入words_embedded
                words_embedded=np.delete(words_embedded,J_minpos+1,1)
                words_embedded[:,J_minpos]=p_norm1[:,J_minpos]
                sentree.nodeFeatures[:, sl+j] = p_norm1[:,J_minpos]
                sentree.nodeFeatures_unnormalized[:, sl+j]= p[:,J_minpos]
                sentree.nodeScores[sl+j] = J_min
                # pp存的可能是父节点信息,因为两个孩子拥有同一个父亲
                sentree.pp[collapsed_sentence[J_minpos]] = sl+j
                sentree.pp[collapsed_sentence[J_minpos+1]] = sl+j
                sentree.kids[sl+j,:] = [collapsed_sentence[J_minpos], collapsed_sentence[J_minpos+1]]
                sentree.numkids[sl+j] = sentree.numkids[sentree.kids[sl+j,0]] + sentree.numkids[sentree.kids[sl+j,1]]


                freq=np.delete(freq,J_minpos+1)
                freq[J_minpos] = (sentree.numkids[sentree.kids[sl+j,0]]*freq1[J_minpos] + sentree.numkids[sentree.kids[sl+j,1]]*freq2[J_minpos])/(sentree.numkids[sentree.kids[sl+j,0]]+sentree.numkids[sentree.kids[sl+j,1]])

                collapsed_sentence=np.delete(collapsed_sentence,J_minpos+1)
                collapsed_sentence[J_minpos]=sl+j
            print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
            print(sentree.pp)
            print("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^")
            print(sentree.kids)
        return sentree
 def forward(self, X):
     return softmax(X.dot(self.W) + self.b)
示例#48
0
 def forward(self, x, t):
     self.t = t
     self.y = softmax(x)
     self.loss = cross_entropy_error(self.y, self.t)
     return self.loss