def mmr_geometricmedian_ker(K):
  m=K.shape[0]
  Ka=mean(K,axis=1)
  aKa=np_sum(Ka)/m

  niter=1000
  xeps=sqrt(np_sum(Ka**2))/100
  xerr=2*xeps

  e1=ones(m)

  for iiter in range(niter):
    ## d2u=sqrt((zeros(m)+aKa)+diag(K)-2*Ka)
    d2u_2=aKa+diag(K)-2*Ka
    ineg=where(d2u_2<0)[0]
    d2u_2[ineg]=0.0
    d2u=sqrt(d2u_2)

    inul=where(d2u<xeps)[0]
    d2u[inul]=xeps
    xdenom=np_sum(e1/d2u)
    Kanext=np_sum(K/outer(d2u,e1),axis=0)/xdenom 
    aKanext=np_sum(Ka/d2u)/xdenom
    if np_max(Kanext-Ka)<xerr:
      Ka=copy(Kanext)
      aKa=aKanext
      break
    Ka=copy(Kanext)
    aKa=aKanext
    
  return(Ka,aKa)
Пример #2
0
def train_batch_cbow(model, sentences, alpha, work=None, neu1=None):
    result = 0
    for sentence in sentences:
        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
                       model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
        for pos, word in enumerate(word_vocabs):
            reduced_window = model.random.randint(model.window)
            start = max(0, pos - model.window + reduced_window)
            window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start)
            word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]

            word2_subwords = []
            vocab_subwords_indices = []
            ngrams_subwords_indices = []

            for index in word2_indices:
                vocab_subwords_indices += [index]
                word2_subwords += model.wv.ngrams_word[model.wv.index2word[index]]

            for subword in word2_subwords:
                ngrams_subwords_indices.append(model.wv.ngrams[subword])

            l1_vocab = np_sum(model.wv.syn0_vocab[vocab_subwords_indices], axis=0)  # 1 x vector_size
            l1_ngrams = np_sum(model.wv.syn0_ngrams[ngrams_subwords_indices], axis=0)  # 1 x vector_size

            l1 = np_sum([l1_vocab, l1_ngrams], axis=0)
            subwords_indices = [vocab_subwords_indices] + [ngrams_subwords_indices]
            if (subwords_indices[0] or subwords_indices[1]) and model.cbow_mean:
                l1 /= (len(subwords_indices[0]) + len(subwords_indices[1]))

            train_cbow_pair(model, word, subwords_indices, l1, alpha, is_ft=True)  # train on the sliding window for target word
        result += len(word_vocabs)
    return result
Пример #3
0
    def train_sentence_dm(model, sentence, lbls, alpha, work=None, neu1=None, train_words=True, train_lbls=True):
        """
        Update distributed memory model by training on a single sentence.

        The sentence is a list of Vocab objects (or None, where the corresponding
        word is not in the vocabulary. Called internally from `Doc2Vec.train()`.

        This is the non-optimized, Python version. If you have a C compiler, gensim
        will use the optimized version from doc2vec_inner instead.

        """
        lbl_indices = [lbl.index for lbl in lbls if lbl is not None]
        lbl_sum = np_sum(model.syn0[lbl_indices], axis=0)
        lbl_len = len(lbl_indices)
        neg_labels = []
        if model.negative:
            # precompute negative labels
            neg_labels = zeros(model.negative + 1)
            neg_labels[0] = 1.

        for pos, word in enumerate(sentence):
            if word is None:
                continue  # OOV word in the input sentence => skip
            reduced_window = random.randint(model.window)  # `b` in the original doc2vec code
            start = max(0, pos - model.window + reduced_window)
            window_pos = enumerate(sentence[start : pos + model.window + 1 - reduced_window], start)
            word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
            l1 = np_sum(model.syn0[word2_indices], axis=0) + lbl_sum  # 1 x layer1_size
            if word2_indices and model.cbow_mean:
                l1 /= (len(word2_indices) + lbl_len)
            neu1e = train_cbow_pair(model, word, word2_indices, l1, alpha, neg_labels, train_words, train_words)
            if train_lbls:
                model.syn0[lbl_indices] += neu1e

        return len([word for word in sentence if word is not None])
Пример #4
0
def train_sentence_dm(model, sentence, lbls, alpha, work=None, neu1=None, train_words=True, train_lbls=True):
    """
    Update distributed memory model by training on a single sentence.

    The sentence is a list of Vocab objects (or None, where the corresponding
    word is not in the vocabulary. Called internally from `Doc2Vec.train()`.

    This is the non-optimized, Python version. If you have cython installed, gensim
    will use the optimized version from doc2vec_inner instead.

    """
    
    lbl_indices = [lbl.index for lbl in lbls if lbl is not None]

    if(len(lbl_indices) <= model.K):return 0

    docIndxPos = int(model.index2word[lbl_indices[0]][5:])
    topKTopics = argsort(model.w_ld[docIndxPos])[::-1][:4]

    
    selected_lbl_indices = [lbl_indices[0]];
    for i in range(2):
        selected_lbl_indices.append(lbl_indices[topKTopics[i]+1])

    
    lbl_sum = np_sum(model.syn0[lbl_indices[0]], axis=0)
##    lbl_len = len(lbl_indices)
    lbl_len = 1
    neg_labels = []
    if model.negative:
        # precompute negative labels
        neg_labels = zeros(model.negative + 1)
        neg_labels[0] = 1.

    for pos, word in enumerate(sentence):
        if word is None:
            continue  # OOV word in the input sentence => skip
        reduced_window = random.randint(model.window)  # `b` in the original doc2vec code
        start = max(0, pos - model.window + reduced_window)
        window_pos = enumerate(sentence[start : pos + model.window + 1 - reduced_window], start)
        word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
        
        l1 = np_sum(model.syn0[word2_indices], axis=0) + lbl_sum  # 1 x layer1_size
        if word2_indices and model.cbow_mean:
            l1 /= (len(word2_indices) + lbl_len)
        neu1e = train_cbow_pair(model, word, word2_indices, l1, alpha, neg_labels, train_words, train_words)
        if train_lbls:
            model.syn0[selected_lbl_indices[0]] += neu1e
            model.syn0[selected_lbl_indices[1:]] += (neu1e/model.noOfLabels)
            
        word2_indices.append(word.index)
        a_1 = np_sum(model.syn0[word2_indices], axis=0)/len(word2_indices)

        
        docIndxNeg = selectNegativeDocs(docIndxPos)
        
        myTrain(model, docIndxPos, docIndxNeg, a_1)

    return len([word for word in sentence if word is not None])
Пример #5
0
    def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None,
                          learn_doctags=True, learn_words=True, learn_hidden=True,
                          word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None):
        """
        Update distributed memory model ("PV-DM") by training on a single document.

        Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`. This
        method implements the DM model with a projection (input) layer that is
        either the sum or mean of the context vectors, depending on the model's
        `dm_mean` configuration field.  See `train_dm_concat()` for the DM model
        with a concatenated input layer.

        The document is provided as `doc_words`, a list of word tokens which are looked up
        in the model's vocab dictionary, and `doctag_indexes`, which provide indexes
        into the doctag_vectors array.

        Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to
        prevent learning-updates to those respective model weights, as if using the
        (partially-)frozen model to infer other compatible vectors.

        This is the non-optimized, Python version. If you have a C compiler, gensim
        will use the optimized version from doc2vec_inner instead.

        """
        if word_vectors is None:
            word_vectors = model.syn0
        if word_locks is None:
            word_locks = model.syn0_lockf
        if doctag_vectors is None:
            doctag_vectors = model.docvecs.doctag_syn0
        if doctag_locks is None:
            doctag_locks = model.docvecs.doctag_syn0_lockf

        word_vocabs = [model.vocab[w] for w in doc_words if w in model.vocab and
                       model.vocab[w].sample_int > model.random.randint(2**32)]
        doctag_sum = np_sum(doctag_vectors[doctag_indexes], axis=0)
        doctag_len = len(doctag_indexes)

        for pos, word in enumerate(word_vocabs):
            reduced_window = model.random.randint(model.window)  # `b` in the original doc2vec code
            start = max(0, pos - model.window + reduced_window)
            window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start)
            word2_indexes = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
            l1 = np_sum(word_vectors[word2_indexes], axis=0) + doctag_sum  # 1 x layer1_size
            if word2_indexes and model.cbow_mean:
                l1 /= (len(word2_indexes) + doctag_len)
            neu1e = train_cbow_pair(model, word, word2_indexes, l1, alpha,
                                    learn_vectors=False, learn_hidden=learn_hidden)
            if word2_indexes and not model.cbow_mean:
                neu1e /= (len(word2_indexes) + doctag_len)
            if learn_doctags:
                doctag_vectors[doctag_indexes] += neu1e * \
                    np_repeat(doctag_locks[doctag_indexes], model.vector_size).reshape(-1, model.vector_size)
            if learn_words:
                word_vectors[word2_indexes] += neu1e * \
                    np_repeat(word_locks[word2_indexes], model.vector_size).reshape(-1, model.vector_size)

        return len(word_vocabs)
Пример #6
0
    def compute(self, today, assets, out, data, decay_rate):
        weights = self.weights(len(data), decay_rate)

        mean = average(data, axis=0, weights=weights)
        variance = average((data - mean) ** 2, axis=0, weights=weights)

        squared_weight_sum = np_sum(weights) ** 2
        bias_correction = squared_weight_sum / (squared_weight_sum - np_sum(weights ** 2))
        out[:] = sqrt(variance * bias_correction)
Пример #7
0
    def train_batch_cbow(model, sentences, alpha, work=None, neu1=None):
        """Update CBOW model by training on a sequence of sentences.

        Called internally from :meth:`~gensim.models.fasttext.FastText.train`.

        Notes
        -----
        This is the non-optimized, Python version. If you have cython installed, gensim will use the optimized version
        from :mod:`gensim.models.fasttext_inner` instead.

        Parameters
        ----------
        model : :class:`~gensim.models.fasttext.FastText`
            Model instance.
        sentences : iterable of list of str
            Iterable of the sentences.
        alpha : float
            Learning rate.
        work : :class:`numpy.ndarray`, optional
            UNUSED.
        neu1 : :class:`numpy.ndarray`, optional
            UNUSED.
        Returns
        -------
        int
            Effective number of words trained.

        """
        result = 0
        for sentence in sentences:
            word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
                           model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
            for pos, word in enumerate(word_vocabs):
                reduced_window = model.random.randint(model.window)
                start = max(0, pos - model.window + reduced_window)
                window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start)
                word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]

                vocab_subwords_indices = []
                ngrams_subwords_indices = []

                for index in word2_indices:
                    vocab_subwords_indices += [index]
                    ngrams_subwords_indices.extend(model.wv.buckets_word[index])

                l1_vocab = np_sum(model.wv.syn0_vocab[vocab_subwords_indices], axis=0)  # 1 x vector_size
                l1_ngrams = np_sum(model.wv.syn0_ngrams[ngrams_subwords_indices], axis=0)  # 1 x vector_size

                l1 = np_sum([l1_vocab, l1_ngrams], axis=0)
                subwords_indices = [vocab_subwords_indices] + [ngrams_subwords_indices]
                if (subwords_indices[0] or subwords_indices[1]) and model.cbow_mean:
                    l1 /= (len(subwords_indices[0]) + len(subwords_indices[1]))

                # train on the sliding window for target word
                train_cbow_pair(model, word, subwords_indices, l1, alpha, is_ft=True)
            result += len(word_vocabs)
        return result
Пример #8
0
    def compute(self, today, assets, out, data, decay_rate):
        weights = self.weights(len(data), decay_rate)

        mean = average(data, axis=0, weights=weights)
        variance = average((data - mean) ** 2, axis=0, weights=weights)

        squared_weight_sum = (np_sum(weights) ** 2)
        bias_correction = (
            squared_weight_sum / (squared_weight_sum - np_sum(weights ** 2))
        )
        out[:] = sqrt(variance * bias_correction)
Пример #9
0
    def jaccard_distance(self):
        def jaccard_similarity(list1, list2):

            intersection = len(list(set(list1).intersection(list2)))
            union = (len(list1) + len(list2)) - intersection
            return float(intersection) / union

        qlist = self.zero_filled_u_l[0]
        rlist = self.zero_filled_u_l[1]

        return np_sum(power(qlist - rlist, 2)) / (np_sum(power(
            qlist, 2)) + np_sum(power(rlist, 2)) - np_sum(qlist * rlist))
Пример #10
0
    def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None,
                          learn_doctags=True, learn_words=True, learn_hidden=True,
                          word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None):
        """
        Update distributed memory model ("PV-DM") by training on a single document.
        Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`. This
        method implements the DM model with a projection (input) layer that is
        either the sum or mean of the context vectors, depending on the model's
        `dm_mean` configuration field.  See `train_document_dm_concat()` for the DM
        model with a concatenated input layer.
        The document is provided as `doc_words`, a list of word tokens which are looked up
        in the model's vocab dictionary, and `doctag_indexes`, which provide indexes
        into the doctag_vectors array.
        Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to
        prevent learning-updates to those respective model weights, as if using the
        (partially-)frozen model to infer other compatible vectors.
        This is the non-optimized, Python version. If you have a C compiler, gensim
        will use the optimized version from doc2vec_inner instead.
        """
        if word_vectors is None:
            word_vectors = model.wv.syn0
        if word_locks is None:
            word_locks = model.syn0_lockf
        if doctag_vectors is None:
            doctag_vectors = model.docvecs.doctag_syn0
        if doctag_locks is None:
            doctag_locks = model.docvecs.doctag_syn0_lockf

        word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab and
                       model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]

        for pos, word in enumerate(word_vocabs):
            reduced_window = model.random.randint(model.window)  # `b` in the original doc2vec code
            start = max(0, pos - model.window + reduced_window)
            window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start)
            word2_indexes = [word2.index for pos2, word2 in window_pos if pos2 != pos]
            l1 = np_sum(word_vectors[word2_indexes], axis=0) + np_sum(doctag_vectors[doctag_indexes], axis=0)
            count = len(word2_indexes) + len(doctag_indexes)
            if model.cbow_mean and count > 1:
                l1 /= count
            neu1e = train_cbow_pair(model, word, word2_indexes, l1, alpha,
                                    learn_vectors=False, learn_hidden=learn_hidden)
            if not model.cbow_mean and count > 1:
                neu1e /= count
            if learn_doctags:
                for i in doctag_indexes:
                    doctag_vectors[i] += neu1e * doctag_locks[i]
            if learn_words:
                for i in word2_indexes:
                    word_vectors[i] += neu1e * word_locks[i]

        return len(word_vocabs)
Пример #11
0
    def offspring(self, other):
        '''
        offspring takes two brains (parents) and returns one  (child).
        The parents exchange some of their genes (weights and biases), and this makes a child.

        The child is the updated version of self. So, I need to change that, in order to be able to
        have more complex selection
        '''

        _child = self.brain

        #decide how many weghts you will take from other
        _n = random.randint(
            np_sum(_child.nodes) +
            1)  # "+1" is needed because of the definition of randint

        for __n in range(_n):
            '''
            remember that we represent weights as w^{l}_{ij}, with
            l: layer
            i the i^{th} node of layer l
            j the j^{th} node of layer l+1
            '''

            l = random.randint(_child.layers + 1)
            i = random.randint(_child.nodes[l])
            j = random.randint(_child.nodes[l + 1])

            #print '{},{},{}'.format(l,i,j)
            #print str(self.brain.weights[l][i][j]) +' <-- '+ str(other.brain.weights[l][i][j])

            _child.update_weight(l, i, j, other.brain.weights[l][i][j])

        #decide how many biases you will take from other
        # "+1" is not needed because the input biases are always 0 (ie there are total_nodes-1 biases)
        _n = random.randint(np_sum(_child.total_nodes))
        for __n in range(_n):
            '''
            remember that we represent biases as w^{l}_{i}, with
            l: layer
            i the i^{th} node of layer l
            '''

            l = random.randint(1,
                               _child.layers + 1)  # b^{0}_{i}=0 (can't change)
            i = random.randint(_child.nodes[l])

            #print '{},{}'.format(l,i)
            #print str(self.brain.biases[l][i]) +' <-- '+ str(other.brain.biases[l][i])

            _child.update_bias(l, i, other.brain.biases[l][i])
Пример #12
0
def s2v_train(sentences, len_sentences, outer_vecs, max_seq_len, wv, weights):
    """Train sentence embedding on a list of sentences

    Called internally from :meth:`~fse.models.sentence2vec.Sentence2Vec.train`.

    Parameters
    ----------
    sentences : iterable of list of str
        The corpus used to train the model.
    len_sentences : int
        Length of the sentence iterable
    wv : :class:`~gensim.models.keyedvectors.BaseKeyedVectors`
        The BaseKeyedVectors instance containing the vectors used for training
    weights : np.ndarray
        Weights used in the summation of the vectors

    Returns
    -------
    np.ndarray
        The sentence embedding matrix of dim len(sentences) * vector_size
    int
        Number of words in the vocabulary actually used for training.
    int
        Number of sentences used for training.
    """
    size = wv.vector_size
    vlookup = wv.vocab

    w_trans = weights[:, None]

    output = empty((len_sentences, size), dtype=REAL)
    for i in range(len_sentences):
        output[i] = full(size, EPS, dtype=REAL)

    effective_words = 0
    effective_sentences = 0

    for i, s in enumerate(sentences):
        sentence_idx = [vlookup[w].index for w in s if w in vlookup]
        if len(sentence_idx):
            v = np_sum(outer_vecs[
                       i][1:min(max_seq_len, len(sentence_idx) + 1), :] *
                       w_trans[sentence_idx[:max_seq_len - 1]], axis=0)
            effective_words += len(sentence_idx)
            effective_sentences += 1
            v *= 1 / len(sentence_idx)
            v /= sqrt(np_sum(v.dot(v)))
            output[i] = v

    return output.astype(REAL), effective_words, effective_sentences
Пример #13
0
    def train_sentence_dm(model,
                          sentence,
                          lbls,
                          alpha,
                          work=None,
                          neu1=None,
                          train_words=True,
                          train_lbls=True):
        """
        Update distributed memory model by training on a single sentence.

        The sentence is a list of Vocab objects (or None, where the corresponding
        word is not in the vocabulary. Called internally from `Doc2Vec.train()`.

        This is the non-optimized, Python version. If you have a C compiler, gensim
        will use the optimized version from doc2vec_inner instead.

        """
        lbl_indices = [lbl.index for lbl in lbls if lbl is not None]
        lbl_sum = np_sum(model.syn0[lbl_indices], axis=0)
        lbl_len = len(lbl_indices)
        neg_labels = []
        if model.negative:
            # precompute negative labels
            neg_labels = zeros(model.negative + 1)
            neg_labels[0] = 1.

        for pos, word in enumerate(sentence):
            if word is None:
                continue  # OOV word in the input sentence => skip
            reduced_window = random.randint(
                model.window)  # `b` in the original doc2vec code
            start = max(0, pos - model.window + reduced_window)
            window_pos = enumerate(
                sentence[start:pos + model.window + 1 - reduced_window], start)
            word2_indices = [
                word2.index for pos2, word2 in window_pos
                if (word2 is not None and pos2 != pos)
            ]
            l1 = np_sum(model.syn0[word2_indices],
                        axis=0) + lbl_sum  # 1 x layer1_size
            if word2_indices and model.cbow_mean:
                l1 /= (len(word2_indices) + lbl_len)
            neu1e = train_cbow_pair(model, word, word2_indices, l1, alpha,
                                    neg_labels, train_words, train_words)
            if train_lbls:
                model.syn0[lbl_indices] += neu1e

        return len([word for word in sentence if word is not None])
Пример #14
0
def array_kwargs_ones():
    """ ones(shape, dtype=float, order='C')
    """

    from numpy import sum as np_sum
    from numpy import ones

    n = 4

    a = ones((n, n - 1), 'float', 'C')
    b = ones((n + 1, 2 * n), float, order='F')
    c = ones((1, n), complex)
    d = ones(dtype=int, shape=2 + n)

    return np_sum(a) + np_sum(b) + np_sum(c) + np_sum(d)
Пример #15
0
def norm_dist(distribution, smooth=True):
    """ Normalize distribution, and apply add-one smoothing to leave
    unused probability space.
    """
    global smoothing_parameter

    if smooth:
        add_one_smoothing = smoothing_parameter
        norming_factor = np_sum(distribution[:, 0] + add_one_smoothing)

        distribution[:, 0] = (distribution[:, 0] +
                              add_one_smoothing) / norming_factor
    else:
        distribution[:, 0] = distribution[:, 0] / np_sum(distribution[:, 0])
    return distribution
Пример #16
0
 def _narrowImg(src, width):
     w = src.shape[1]
     l = 0
     r = w - 1
     suml = np_sum(src[:, l])
     sumr = np_sum(src[:, r])
     while w > width:
         if suml <= sumr:
             r -= 1
             sumr = np_sum(src[:, r])
         else:
             l += 1
             suml = np_sum(src[:, l])
         w -= 1
     return l
Пример #17
0
def mmr_geometricmedian(X):
  (m,n)=X.shape
  u=mean(X,axis=0)
  niter=1000
  xeps=sqrt(np_sum(u**2))/1000
  xerr=2*xeps
  for i in range(niter):
    d2u=sqrt(np_sum((X-tile(u,(m,1)))**2,axis=1))
    inul=where(d2u<xeps)[0]
    d2u[inul]=xeps
    unext=np_sum(X/tile(d2u.reshape((m,1)),(1,n)),axis=0)/np_sum(ones(m)/d2u)
    if np_max(unext-u)<xerr:
      break
    u=copy(unext)
  return(unext,i,np_max(unext-u))
Пример #18
0
def comp_wind_sym(wind_mat):
    """Computes the winding pattern periodicity and symmetries

    Parameters
    ----------
    wind_mat : numpy.ndarray
        Matrix of the Winding

    Returns
    -------
    Nperw: int
        Number of electrical period of the winding

    """
    assert len(wind_mat.shape) == 4, "dim 4 expected for wind_mat"

    # Summing on all the layers (Nlay_r and Nlay_theta)
    wind_mat2 = squeeze(np_sum(np_sum(wind_mat, axis=1), axis=0))

    qs = wind_mat.shape[3]  # Number of phase
    Zs = wind_mat.shape[2]  # Number of Slot

    Nperw = 1  # Number of electrical period of the winding
    Nperslot = 1  # Periodicity of the winding in number of slots

    # Looking for the periodicity of each phase
    for q in range(0, qs):
        k = 1
        is_sym = False
        while k <= Zs and not is_sym:
            # We shift the array arround the slot and check if it's the same
            if array_equal(wind_mat2[:, q], roll(wind_mat2[:, q], shift=k)):
                is_sym = True
            else:
                k += 1
        # least common multiple to find common periodicity between different
        #  phase
        Nperslot = lcm(Nperslot, k)

    # If Nperslot > Zs no symmetry
    if Nperslot > 0 and Nperslot < Zs:
        # nb of periods of the winding (2 means 180°)
        Nperw = Zs / float(Nperslot)
        # if Zs cannot be divided by Nperslot (non integer)
        if Nperw % 1 != 0:
            Nperw = 1

    return int(Nperw)
Пример #19
0
    def mutate(self):
        '''
        Mutate an individual.
        Mutate a random number of weights and biases
        '''
        _c = self.brain

        #decide how many weghts to mutate
        _n = random.randint(
            np_sum(_c.nodes) +
            1)  # "+1" is needed because of the definition of randint

        for __n in range(_n):
            '''
            remember that we represent weights as w^{l}_{ij}, with
            l: layer
            i the i^{th} node of layer l
            j the j^{th} node of layer l+1
            '''

            l = random.randint(_c.layers + 1)
            i = random.randint(_c.nodes[l])
            j = random.randint(_c.nodes[l + 1])

            #print '{},{},{}'.format(l,i,j)

            _c.update_weight(l, i, j,
                             random.choice([-1, 1]) *
                             random.random())  # set a weight from -1 to 1

        #decide how many biases you will take from other
        # "+1" is not needed because the input biases are always 0 (ie there are total_nodes-1 biases)
        _n = random.randint(np_sum(_c.total_nodes))
        for __n in range(_n):
            '''
            remember that we represent biases as w^{l}_{i}, with
            l: layer
            i the i^{th} node of layer l
            '''

            l = random.randint(1, _c.layers + 1)  # b^{0}_{i}=0 (can't change)
            i = random.randint(_c.nodes[l])

            print('{},{}'.format(l, i))

            _c.update_bias(l, i,
                           random.choice([-1, 1]) *
                           random.random())  # set a bias from -1 to 1
    def preprocess_datasets(self, path_dataset, groupStage):
        PATH_DATA = os_path.join(path_dataset, "4")
        print("(INFO) EVALUATING DATASET ...")
        path_img = sorted(listdir(PATH_DATA))
        if path_img == []:
            return -1, -1
        num_img = len(path_img)

        # Histogram of all images in folder
        hChannel = []
        sChannel = []
        vChannel = []

        for image_path in path_img:
            img = imread(os_path.join(PATH_DATA, image_path))
            img = resize(img, (6000, 4000))
            img = img[500:-500, 750:-750, :]
            # HSV channel
            img = cvtColor(img, COLOR_BGR2HSV)
            # HSV histogram
            h = calcHist([img], [0], None, [256], [0, 256]).reshape(256, )
            s = calcHist([img], [1], None, [256], [0, 256]).reshape(256, )
            v = calcHist([img], [2], None, [256], [0, 256]).reshape(256, )

            hChannel.append(h)
            sChannel.append(s)
            vChannel.append(v)

        # Compute dissimilarity
        maxI = 0
        for i in range(num_img):
            one = []
            for j in range(num_img):
                c1 = np_sum(
                    np_absolute(hChannel[j] - hChannel[i])) / (HEIGHT * WIDTH)
                c2 = np_sum(
                    np_absolute(sChannel[j] - sChannel[i])) / (HEIGHT * WIDTH)
                c = (c1 + c2) / 2
                if c > maxI:
                    maxI = c
                    save = [i, j]

        img0 = path_img[save[0]]
        img1 = path_img[save[1]]

        imgSample1 = os_path.join(PATH_DATA, img0)
        imgSample2 = os_path.join(PATH_DATA, img1)
        return imgSample1, imgSample2
Пример #21
0
    def log_score_per_ngram(self, corpus):
        """
        Given a corpus outside of training data.  Finds the average ngram log probability of the corpus.

        :param corpus: String.  ASCII encoded corpus to score
        :return: average ngram log probability
        """
        probability_keys = self._get_padded_ngrams(corpus, self.highest_order)
        for i in range(0, len(probability_keys)):
            if (probability_keys[i][-1], ) not in self.vocab:
                probability_keys[i] = *probability_keys[i][:-1], "<unk>"

        sentence_probabilities = [
            self.ngram_probabilities.get(key) for key in probability_keys
        ]
        for i in range(0, len(sentence_probabilities)):

            # this is the case for completely unknown
            if sentence_probabilities[i] is None:
                sentence_probabilities[i] = log(self.av_unk_probability)

            else:
                sentence_probabilities[i] = log(sentence_probabilities[i])

        log_sum = np_sum(sentence_probabilities)
        all_ngrams = []
        all_ngrams.extend(ngrams(corpus.split(), self.highest_order))
        ngram_count = len(all_ngrams)
        if not ngram_count:
            print(
                "Error: Not enough ngrams.  Ensure that corpus contains at least as many words as the highest order"
            )
            return float("-inf")  # this case is impossible

        return log_sum / ngram_count
Пример #22
0
def construct_local_load(element, shape_functions, quad_data, f):
    #
    # Set quad data
    x_quad = quad_data.x
    w_quad = quad_data.w

    #
    # Init empty matrix
    num_shape_functions = len(shape_functions)
    f_el = zeros((num_shape_functions, 1))

    #
    #
    x0 = element.x[0]
    xl = element.x[-1]

    #
    # Transform the quadrature points
    x_quad_transform = coord_transform(x_quad, x0, xl)

    #
    # Evaluate the functions at the quadrature points
    f_quad = f(x_quad_transform)
    psi_quad = [p.psi(p, x_quad) for p in shape_functions]

    for i in range(num_shape_functions):
        #
        # Perform quadrature
        f_el[i, 0] = ((xl - x0) / 2.) * np_sum(w_quad * (f_quad * psi_quad[i]))
    #
    return f_el
Пример #23
0
    def train_sentence_cbow(model, sentence,context_vector, alpha, work=None, neu1=None):
        """
        Update CBOW model by training on a single sentence.

        The sentence is a list of string tokens, which are looked up in the model's
        vocab dictionary. Called internally from `word2mat.train()`.

        This is the non-optimized, Python version. If you have cython installed, gensim
        will use the optimized version from word2mat_inner instead.

        """
        word_vocabs = [model.vocab[w] for w in sentence if w in model.vocab and
                       model.vocab[w].sample_int > model.random.rand() * 2**32]
        for pos, word in enumerate(word_vocabs):
            reduced_window = model.random.randint(model.window)  # `b` in the original word2mat code
            start = max(0, pos - model.window + reduced_window)
            window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start)
            word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
            l1 = np_sum(model.syn0[word2_indices], axis=0)  # 1 x vector_size
            l1 = l1.reshape(model.topic_size,model.vector_size)
            l1 = l1.T.dot(context_vector)
            if word2_indices and model.cbow_mean:
                l1 /= len(word2_indices)
            train_cbow_pair(model, word, word2_indices,context_vector, l1, alpha)

        return len(word_vocabs)
Пример #24
0
def _reduce_constraints(A, b):
    """ Make the constraint non-singular

    if the constraint is on the form:
    dot(A,x) = b
    A may be singular. to avoid this problem, we extract the
    non-singular part of the equation thanks to svd:
    A = U*S*Vh with U.T*U = I and Vh.T*Vh = I
    if r is the rank of A, we have:
    Ar = S[:r,:r]*Vh[:r,:]
    br = U[:,:r].T*b
    Hence:
    Ar*x = br
    """
    
    try:
        u, s, vh = svd(A, full_matrices=False)
        r = np_sum(where(s>1e-3, 1, 0)) # compute the rank of A
        ur, sr, vhr = u[:, :r], s[:r], vh[:r, :]
        Ar = dot(diag(sr), vhr)
        br = dot(ur.T, b)
    except (LinAlgError):
        Ar = A.copy()
        br = b.copy()
    return Ar, br
Пример #25
0
def mmr_polypower_d(ndim, ndegree):

    ndegree = int(ndegree)
    ## number of terms  = \binomial(ndegree+ndim,ndim)
    nd = 1
    for i in range(ndim):
        nd *= (ndegree + i + 1) / (i + 1)
    nd = int(nd)

    xpolydir = {}
    xpower = zeros(ndim, dtype=int)
    xpolydir[tuple(xpower)] = 1

    for i in range(nd):
        for j in range(ndim):
            if xpower[j] < ndegree - np_sum(xpower[j + 1 :]):
                xpower[j] += 1
                xpolydir[tuple(xpower)] = 1
                break
            else:
                xpower[j] = 0

    xpolylist = [xpow for xpow in xpolydir.keys()]
    xpolylist.sort()
    xpolypower = array(xpolylist)

    return xpolypower
Пример #26
0
def mmr_polyfeature_d(xdata, ndegree):

    (m, ndim) = xdata.shape
    ndegree = int(ndegree)
    ## number of terms  = \binomial(ndegree+ndim,ndim)
    nd = 1
    for i in range(ndim):
        nd *= (ndegree + i + 1) / (i + 1)
    nd = int(nd)

    xpolydir = {}
    xpower = zeros(ndim, dtype=int)
    xpolydir[tuple(xpower)] = ones(m)

    for i in range(nd):
        for j in range(ndim):
            if xpower[j] < ndegree - np_sum(xpower[j + 1 :]):
                xterm = xpolydir[tuple(xpower)]
                xpower[j] += 1
                xpolydir[tuple(xpower)] = xterm * xdata[:, j]
                break
            else:
                xpower[j] = 0

    xpolydata = zeros((m, nd))
    xpolylist = [xpow for xpow in xpolydir.keys()]
    xpolylist.sort()

    for i in range(nd):
        xpow = xpolylist[i]
        xpolydata[:, i] = xpolydir[xpow]

    return xpolydata
Пример #27
0
def mmr_polypower_dn(ndim, maxdegree, ldegree):

    maxdegree = int(maxdegree)
    if len(ldegree) == 0:
        ldegree = [maxdegree] * ndim

    xpolydir = {}
    xpower = zeros(ndim, dtype=int)
    xpolydir[tuple(xpower)] = 1

    istate = 1
    while istate == 1:
        for j in range(ndim):
            if xpower[j] < min(maxdegree - np_sum(xpower[j + 1 :]), ldegree[j]):
                xpower[j] += 1
                xpolydir[tuple(xpower)] = 1
                break
            else:
                if j < ndim - 1:
                    xpower[j] = 0
                else:
                    istate = 0

    xpolylist = [xpow for xpow in xpolydir.keys()]
    xpolylist.sort()
    xpolypower = array(xpolylist)

    return xpolypower
Пример #28
0
def mmr_polyfeature_dn(xdata, maxdegree, ldegree):

    (m, ndim) = xdata.shape
    maxdegree = int(maxdegree)
    if len(ldegree) == 0:
        ldegree = [maxdegree for i in range(ndim)]

    xpolydir = {}
    xpower = zeros(ndim, dtype=int)
    xpolydir[tuple(xpower)] = ones(m)

    istate = 1
    while istate == 1:
        for j in range(ndim):
            if xpower[j] < min(maxdegree - np_sum(xpower[j + 1 :]), ldegree[j]):
                xterm = xpolydir[tuple(xpower)]
                xpower[j] += 1
                xpolydir[tuple(xpower)] = xterm * xdata[:, j]
                break
            else:
                if j < ndim - 1:
                    xpower[j] = 0
                else:
                    istate = 0

    xpolylist = [xpow for xpow in xpolydir.keys()]
    xpolylist.sort()
    nd = len(xpolylist)
    xpolydata = zeros((m, nd))

    for i in range(nd):
        xpow = xpolylist[i]
        xpolydata[:, i] = xpolydir[xpow]

    return xpolydata
Пример #29
0
def array_kwargs_full():
    """ full(shape, fill_value, dtype=None, order='C')
    """

    from numpy import sum as np_sum
    from numpy import full

    n = 3

    a = full((n, n - 1), 0.5, 'float', 'C')
    b = full((n + 1, 2 * n), 2.0, order='F')
    c = full((1, n), 3)
    d = full(2 + n, order='F', fill_value=5)
    e = full(dtype=int, fill_value=1.0, shape=2 * n)

    return np_sum(a) + np_sum(b) + np_sum(c) + np_sum(d) + np_sum(e)
Пример #30
0
 def train_batch_labeled_cbow(model,
                              sentences,
                              alpha,
                              work=None,
                              neu1=None):
     result = 0
     for sentence in sentences:
         document, target = sentence
         word_vocabs = [
             model.wv.vocab[w] for w in document if w in model.wv.vocab
             and model.wv.vocab[w].sample_int > model.random.rand() * 2**32
         ]
         target_vocabs = [
             model.lvocab[t] for t in target if t in model.lvocab
         ]
         for target in target_vocabs:
             word2_indices = [w.index for w in word_vocabs]
             l1 = np_sum(model.wv.syn0[word2_indices],
                         axis=0)  # 1 x vector_size
             if word2_indices and model.cbow_mean:
                 l1 /= len(word2_indices)
             if model.softmax:
                 train_cbow_pair_softmax(model, target, word2_indices, l1,
                                         alpha)
             else:
                 train_cbow_pair(model, target, word2_indices, l1, alpha)
         result += len(word_vocabs)
     return result
Пример #31
0
    def train_sentence_sg(model, sentence, context_vector,alpha, work=None,neu1=None):
        """
        Update skip-gram model by training on a single sentence.

        The sentence is a list of string tokens, which are looked up in the model's
        vocab dictionary. Called internally from `word2mat.train()`.

        This is the non-optimized, Python version. If you have cython installed, gensim
        will use the optimized version from word2mat_inner instead.

        """
        word_vocabs = [(model.vocab[w],t) for w,t in sentence if w in model.vocab and
                       model.vocab[w].sample_int > model.random.rand() * 2**32]
        for pos, item in enumerate(word_vocabs):
            word,topic = item
            reduced_window = model.random.randint(model.window)  # `b` in the original word2mat code
            topic_start = max(0, pos - model.topic_window)
            for i in xrange(model.topic_size):
                context_vector[i] = 0.

            for pos2,item2 in enumerate(word_vocabs[topic_start:(pos+model.topic_window+1)],topic_start):
                word2,topic2 =item2
                context_vector[topic2] += 1
            context_vector = context_vector / np_sum(context_vector)
            # now go over all words from the (reduced) window, predicting each one in turn
            start = max(0, pos - model.window + reduced_window)
            for pos2, item2 in enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start):
                word2,topic2 =  item2
                # don't train on the `word` itself
                if pos2 != pos:
                    train_sg_pair(model, model.index2word[word2.index], word.index,context_vector, alpha)

        return len(word_vocabs)
Пример #32
0
    def train_sentence_fastsent(model, sentences, alpha, work=None, neu1=None):
        """
        Update parameters based on three consecutive sentences from the training data
        model: the model object
        sentences: an ordered list of three sentences as lists of words
        alpha: the learning rate
        """

        current_sent = sentences[1]
        if model.autoencode:
            context_sents = sentences[0] + sentences[1] + sentences[2]
        else:
            context_sents = sentences[0] + sentences[2]
        word_vocabs = [
            model.vocab[w] for w in current_sent if w in model.vocab
            and model.vocab[w].sample_int > model.random.rand() * 2**32
        ]
        context_vocabs = [
            model.vocab[w] for w in context_sents if w in model.vocab
            and model.vocab[w].sample_int > model.random.rand() * 2**32
        ]
        word2_indices = [word.index for word in word_vocabs]
        l1 = np_sum(model.syn0[word2_indices], axis=0)  # 1 x vector_size
        if word2_indices and model.fastsent_mean:
            l1 /= len(word2_indices)
        for word in context_vocabs:
            train_fastsent_pair(model, word, word2_indices, l1, alpha)
        return len(context_vocabs)
Пример #33
0
def wmd(document1, document2, model):
    # Remove out-of-vocabulary words.
    document1 = [token for token in document1 if token in model]
    document2 = [token for token in document2 if token in model]
    if len(document1) == 0 or len(document2) == 0:
        return 1.
    dictionary = Dictionary(documents=[document1, document2])
    vocab_len = len(dictionary)
    # Compute distance matrix.
    distance_matrix = zeros((vocab_len, vocab_len), dtype=double)
    for i, t1 in list(dictionary.items()):
        for j, t2 in list(dictionary.items()):
            distance_matrix[i, j] = scipy.spatial.distance.cosine(
                model[t1], model[t2])
    if np_sum(distance_matrix) == 0.0:
        # `emd` gets stuck if the distance matrix contains only zeros.
        return 0.

    def nbow(document):
        d = zeros(vocab_len, dtype=double)
        nbow = dictionary.doc2bow(document)  # Word frequencies.
        doc_len = len(document)
        for idx, freq in nbow:
            d[idx] = freq / float(doc_len)  # Normalized word frequencies.
        return d

    # Compute nBOW representation of documents.
    d1 = nbow(document1)
    d2 = nbow(document2)
    # Compute WMD.
    res = emd(d1, d2, distance_matrix)
    return res if res >= 0 else 1
Пример #34
0
def train_sentence_cbow(model, sentence, alpha, work=None, neu1=None):
    """
    Update CBOW model by training on a single sentence.

    The sentence is a list of Vocab objects (or None, where the corresponding
    word is not in the vocabulary. Called internally from `Word2Vec.train()`.

    This is the non-optimized, Python version. If you have cython installed, gensim
    will use the optimized version from word2vec_inner instead.

    """
    labels = []
    if model.negative:
        # precompute negative labels
        labels = zeros(model.negative + 1)
        labels[0] = 1.

    for pos, word in enumerate(sentence):
        if word is None:
            continue  # OOV word in the input sentence => skip
        reduced_window = random.randint(
            model.window)  # `b` in the original word2vec code
        start = max(0, pos - model.window + reduced_window)
        window_pos = enumerate(
            sentence[start:pos + model.window + 1 - reduced_window], start)
        word2_indices = [
            word2.index for pos2, word2 in window_pos
            if (word2 is not None and pos2 != pos)
        ]
        l1 = np_sum(model.syn0[word2_indices], axis=0)  # 1 x layer1_size
        if word2_indices and model.cbow_mean:
            l1 /= len(word2_indices)
        train_cbow_pair(model, word, word2_indices, l1, alpha, labels)

    return len([word for word in sentence if word is not None])
Пример #35
0
def get_field(self, axes_list):
    """Returns the values of the field (with symmetries and sums).
    Parameters
    ----------
    self: Data
        a Data object
    axes_list: list
        a list of RequestedAxis objects
    Returns
    -------
    values: ndarray
        values of the field
    """

    values = self.values
    for axis_requested in axes_list:
        # Rebuild symmetries only for fft case
        axis_symmetries = self.axes[axis_requested.index].symmetries
        if (axis_requested.transform == "fft"
                and "antiperiod" in axis_symmetries):
            nper = axis_symmetries["antiperiod"]
            axis_symmetries["antiperiod"] = 2
            values = rebuild_symmetries(values, axis_requested.index,
                                        axis_symmetries)
            axis_symmetries["antiperiod"] = nper

        # Sum over sum axes
        if axis_requested.extension == "sum":
            values = np_sum(values, axis=axis_requested.index)
    return values
Пример #36
0
def mmr_polypower_dn(ndim,maxdegree,ldegree):

  maxdegree=int(maxdegree)
  if len(ldegree)==0:
    ldegree=[maxdegree]*ndim

  xpolydir={}
  xpower=zeros(ndim,dtype=int)
  xpolydir[tuple(xpower)]=1

  istate=1
  while istate==1:
    for j in range(ndim):
      if xpower[j]<min(maxdegree-np_sum(xpower[j+1:]),ldegree[j]):
        xpower[j]+=1
        xpolydir[tuple(xpower)]=1
        break
      else:
        if j<ndim-1:
          xpower[j]=0
        else:
          istate=0
        
  xpolylist=[  xpow for xpow  in xpolydir.keys()]
  xpolylist.sort()
  xpolypower=array(xpolylist)


  return(xpolypower)
def compute_semantic_distance_matrix(model, noun_freq_polar1_terms,
                                     noun_freq_polar2_terms, dictionary,
                                     filename1, filename2):
    # Dictionary is doc1 terms * doc2 terms and
    # distance matrix is ((doc1 terms + doc2 terms) * (doc1 terms + doc2 terms))
    # This dimension of matrix is required for Earth Mover distance computation

    vocab_len = len(dictionary)

    docset1 = set(noun_freq_polar1_terms)
    docset2 = set(noun_freq_polar2_terms)

    distance_matrix = np.full((vocab_len, vocab_len), 0.0)

    for i, t1 in dictionary.items():
        for j, t2 in dictionary.items():
            if t1 not in docset1 or t2 not in docset2:
                continue

            if t1 == t2 and model.strategy != "doc2vec":
                distance_matrix[i, j] = 0.00001
                continue

            distance_matrix[i, j] = model.compute_semantic_distance(
                t1, t2, "cosine")

    if np_sum(distance_matrix) == 0.0:
        print('The distance matrix is all zeros.')
        return None

    return distance_matrix
Пример #38
0
def mmr_polyfeature_dn(xdata,maxdegree,ldegree):

  (m,ndim)=xdata.shape
  maxdegree=int(maxdegree)
  if len(ldegree)==0:
    ldegree=[ maxdegree for i in range(ndim)]

  xpolydir={}
  xpower=zeros(ndim,dtype=int)
  xpolydir[tuple(xpower)]=ones(m)

  istate=1
  while istate==1:
    for j in range(ndim):
      if xpower[j]<min(maxdegree-np_sum(xpower[j+1:]),ldegree[j]):
        xterm=xpolydir[tuple(xpower)]
        xpower[j]+=1
        xpolydir[tuple(xpower)]=xterm*xdata[:,j]
        break
      else:
        if j<ndim-1:
          xpower[j]=0
        else:
          istate=0
        
  xpolylist=[ xpow for xpow  in xpolydir.keys()]
  xpolylist.sort()
  nd=len(xpolylist)
  xpolydata=zeros((m,nd))

  for i in range(nd):
    xpow=xpolylist[i]
    xpolydata[:,i]=xpolydir[xpow]

  return(xpolydata)
Пример #39
0
def mmr_polyfeature_d(xdata,ndegree):

  (m,ndim)=xdata.shape
  ndegree=int(ndegree)
## number of terms  = \binomial(ndegree+ndim,ndim)
  nd=1
  for i in range(ndim):
    nd*=(ndegree+i+1)/(i+1)
  nd=int(nd)

  xpolydir={}
  xpower=zeros(ndim,dtype=int)
  xpolydir[tuple(xpower)]=ones(m)
 
  for i in range(nd):
    for j in range(ndim):
      if xpower[j]<ndegree-np_sum(xpower[j+1:]):
        xterm=xpolydir[tuple(xpower)]
        xpower[j]+=1
        xpolydir[tuple(xpower)]=xterm*xdata[:,j]
        break
      else:
        xpower[j]=0
        
  xpolydata=zeros((m,nd))
  xpolylist=[  xpow for xpow  in xpolydir.keys()]
  xpolylist.sort()

  for i in range(nd):
    xpow=xpolylist[i]
    xpolydata[:,i]=xpolydir[xpow]

  return(xpolydata)
Пример #40
0
    def train_sentence_cbow(model, sentence, alpha, work=None, neu1=None):
        """
        Update CBOW model by training on a single sentence.

        The sentence is a list of Vocab objects (or None, where the corresponding
        word is not in the vocabulary. Called internally from `Word2Vec.train()`.

        This is the non-optimized, Python version. If you have cython installed, gensim
        will use the optimized version from word2vec_inner instead.

        """
        labels = []
        if model.negative:
            # precompute negative labels
            labels = zeros(model.negative + 1)
            labels[0] = 1.

        for pos, word in enumerate(sentence):
            if word is None:
                continue  # OOV word in the input sentence => skip
            reduced_window = random.randint(model.window) # `b` in the original word2vec code
            start = max(0, pos - model.window + reduced_window)
            window_pos = enumerate(sentence[start : pos + model.window + 1 - reduced_window], start)
            word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
            l1 = np_sum(model.syn0[word2_indices], axis=0) # 1 x layer1_size
            if word2_indices and model.cbow_mean:
                l1 /= len(word2_indices)
            train_cbow_pair(model, word, word2_indices, l1, alpha, labels)

        return len([word for word in sentence if word is not None])
Пример #41
0
def rmsle(actual, predicted):
    """ Root mean squared logarithmic error.
    """
    actual, predicted = _preformat_inputs(actual, predicted)
    count_of = predicted.shape[0]
    square_logarithm_difference = log((actual + 1) / (predicted + 1)) ** 2
    return sqrt((1 / count_of) * np_sum(square_logarithm_difference))
def area_and_convexity_single_batch(data_mb,
                                    num_constraints=None,
                                    area=None,
                                    polygons_number=None,
                                    img_width=None,
                                    img_height=None,
                                    **kwargs):
    # preallocate an output array that will contain the computed
    # constraints values for the batch
    mb_constraints_values = np_empty(shape=(data_mb.shape[0], num_constraints),
                                     dtype=np_float32)

    for i in prange(data_mb.shape[0]):
        sample = data_mb[i]
        # preallocate an output array that will contain the computed
        # constraints value for the i-th element
        constraints = np_empty(shape=(num_constraints, ), dtype=np_float32)

        target_area = area * polygons_number
        nonzero = np_sum(sample)
        norm = img_width * img_height - target_area
        greater_area_inner = min(1, max(0, nonzero - target_area) / norm)
        smaller_area_inner = min(1, max(0, target_area - nonzero) / norm)
        constraints[0] = greater_area_inner
        constraints[1] = smaller_area_inner
        # convexity
        constraints[2] = _convex(sample, img_width, img_height)
        mb_constraints_values[i] = constraints
    return mb_constraints_values
Пример #43
0
def get_field(self, axes_list):
    """Returns the values of the field (with symmetries and sums).
    Parameters
    ----------
    self: Data
        a Data object
    axes_list: list
        a list of RequestedAxis objects
    Returns
    -------
    values: ndarray
        values of the field
    """

    values = self.values
    for axis_requested in axes_list:
        # Rebuild symmetries only for fft case
        if (
            axis_requested.transform == "fft"
            and axis_requested.corr_name in self.symmetries.keys()
        ):
            if "antiperiod" in self.symmetries.get(axis_requested.corr_name):
                values = self.rebuild_symmetries(
                    values,
                    axis_requested.corr_name,
                    axis_requested.index,
                    is_antiperiod=True,
                )

        # Sum over sum axes
        if axis_requested.extension == "sum":
            values = np_sum(values, axis=axis_requested.index)
    return values
Пример #44
0
def mmr_polypower_d(ndim,ndegree):

  ndegree=int(ndegree)
## number of terms  = \binomial(ndegree+ndim,ndim)
  nd=1
  for i in range(ndim):
    nd*=(ndegree+i+1)/(i+1)
  nd=int(nd)

  xpolydir={}
  xpower=zeros(ndim,dtype=int)
  xpolydir[tuple(xpower)]=1
 
  for i in range(nd):
    for j in range(ndim):
      if xpower[j]<ndegree-np_sum(xpower[j+1:]):
        xpower[j]+=1
        xpolydir[tuple(xpower)]=1
        break
      else:
        xpower[j]=0
        
  xpolylist=[  xpow for xpow  in xpolydir.keys()]
  xpolylist.sort()
  xpolypower=array(xpolylist)


  return(xpolypower)
Пример #45
0
def _reduce_constraints(A, b):
    """ Make the constraint non-singular

    if the constraint is on the form:
    dot(A,x) = b
    A may be singular. to avoid this problem, we extract the
    non-singular part of the equation thanks to svd:
    A = U*S*Vh with U.T*U = I and Vh.T*Vh = I
    if r is the rank of A, we have:
    Ar = S[:r,:r]*Vh[:r,:]
    br = U[:,:r].T*b
    Hence:
    Ar*x = br
    """

    try:
        u, s, vh = svd(A, full_matrices=False)
        r = np_sum(where(s > 1e-3, 1, 0))  # compute the rank of A
        ur, sr, vhr = u[:, :r], s[:r], vh[:r, :]
        Ar = dot(diag(sr), vhr)
        br = dot(ur.T, b)
    except (LinAlgError):
        Ar = A.copy()
        br = b.copy()
    return Ar, br
Пример #46
0
    def coding_bases(self, seq_id):
        """Calculate number of coding bases in sequence."""

        # check if sequence has any genes
        if seq_id not in self.genes:
            return 0

        return np_sum(self.coding_mask[seq_id])
Пример #47
0
def kullback_leibler(actual, predicted):
    """ Kullback-Leibler error.
    """
    actual, predicted = _preformat_inputs(actual, predicted)
    count_of_inputs = actual.shape[0]
    return (1. / count_of_inputs) * np_sum(
        predicted * log(predicted / actual) +
        (1 - predicted) * log((1 - predicted) / (1 - actual))
    )
Пример #48
0
def train_cat_vec_cbow_pp(model, sent_vec, cat_vec, sentence, alpha, work=None, neu1=None, sent_vec_grad=None, cat_vec_grad=None):
    """
    Update CBOW model by training on a single sentence.

    The sentence is a list of Vocab objects (or None, where the corresponding
    word is not in the vocabulary. Called internally from `Sent2Vec.train()`.

    This is the non-optimized, Python version. If you have cython installed, gensim
    will use the optimized version from word2vec_inner instead.

    """
    w2vmodel = model.w2v
    if model.negative:
        # precompute negative labels
        labels = zeros(model.negative + 1)
        labels[0] = 1.

    for pos, word in enumerate(sentence):
        if word is None:
            continue  # OOV word in the input sentence => skip
        reduced_window = random.randint(model.window) # `b` in the original word2vec code
        start = max(0, pos - model.window + reduced_window)
        window_pos = enumerate(sentence[start : pos + model.window + 1 - reduced_window], start)
        word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
        l1 = np_sum(w2vmodel.syn0[word2_indices], axis=0) # 1 x layer1_size
        l1 += sent_vec + cat_vec
        if word2_indices and model.cbow_mean:
            l1 /= (len(word2_indices) + 1) ##modified by jmarui
        neu1e = zeros(l1.shape)

        if model.hs:
            l2a = w2vmodel.syn1[word.point] # 2d matrix, codelen x layer1_size
            fa = 1. / (1. + exp(-dot(l1, l2a.T))) # propagate hidden -> output
            ga = (1. - word.code - fa) * alpha # vector of error gradients multiplied by the learning rate
            if model.word_learn == 1: w2vmodel.syn1[word.point] += outer(ga, l1) # learn hidden -> output
            neu1e += dot(ga, l2a) # save error

        if model.negative:
            # use this word (label = 1) + `negative` other random words not from this sentence (label = 0)
            word_indices = [word.index]
            while len(word_indices) < model.negative + 1:
                w = w2vmodel.table[random.randint(w2vmodel.table.shape[0])]
                if w != word.index:
                    word_indices.append(w)
            l2b = w2vmodel.syn1neg[word_indices] # 2d matrix, k+1 x layer1_size
            fb = 1. / (1. + exp(-dot(l1, l2b.T))) # propagate hidden -> output
            gb = (labels - fb) * alpha # vector of error gradients multiplied by the learning rate
            if model.word_learn == 1: w2vmodel.syn1neg[word_indices] += outer(gb, l1) # learn hidden -> output
            neu1e += dot(gb, l2b) # save error

        if model.word_learn == 1: w2vmodel.syn0[word2_indices] += neu1e # learn input -> hidden, here for all words in the window separately
        sent_vec += neu1e # learn input -> hidden, here for all words in the window separately
        if model.cat_learn == 1: cat_vec += neu1e # learn input -> hidden, here for all words in the window separately

    return len([word for word in sentence if word is not None])
def train_sentence_cbow(model, sentence, alpha, work=None, neu1=None):
    """
    Update CBOW model by training on a single sentence.

    The sentence is a list of Vocab objects (or None, where the corresponding
    word is not in the vocabulary. Called internally from `Word2Vec.train()`.

    This is the non-optimized, Python version. If you have cython installed, gensim
    will use the optimized version from word2vec_inner instead.

    """

    labels = []
    if model.negative:
        # precompute negative labels
        labels = zeros(model.negative + 1)
        labels[0] = 1.

    for pos, word in enumerate(sentence):
        
        if word is None:
            continue  # OOV word in the input sentence => skip
        
        #reduced_window = random.randint(model.window) # `b` in the original word2vec code
        #start = max(0, pos - model.window + reduced_window)
        start = max(0, pos - model.window)
        #window_pos = enumerate(sentence[start : pos + model.window + 1 - reduced_window], start)
        window_pos = enumerate(sentence[start : pos + 1], start)
        #window_pos = enumerate(sentence[start : pos + model.window + 1], start)
        word2_indices_tmp = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
        word2_indices = []
        
        for w2_i in range(0, len(word2_indices_tmp)):
            w2_index = word2_indices_tmp[w2_i]
            name = model.index2word[w2_index]
            
            if model.context_labeling == True:
                if w2_i >= model.window:
                    w2_i = w2_i - model.window
                
                labeled_name = "LabCon_" + str(name) + "_" + str(w2_i)
                
                vocab_obj = model.vocab[labeled_name]
                word2_indices.append(vocab_obj.index)
            else:
                vocab_obj = model.vocab[name]
                word2_indices.append(vocab_obj.index)
            
        
        l1 = np_sum(model.syn0[word2_indices], axis=0) # 1 x layer1_size
        if word2_indices and model.cbow_mean:
            l1 /= len(word2_indices)
        train_cbow_pair(model, word, word2_indices, l1, alpha, labels)

    return len([word for word in sentence if word is not None])
Пример #50
0
def cross_entropy_error(actual, predicted, espilon=1e-10):
    """ Cross entropy error.
    """
    actual, predicted = _preformat_inputs(actual, predicted)
    count_of_inputs = actual.shape[0]
    return -(1 / count_of_inputs) * np_sum(
        (
            predicted * log(actual + espilon) +
            (1 - predicted) * log(1 - actual + espilon)
        )
    )
Пример #51
0
    def get_dNr_psi_w_mtx(self,r_pnt, node_ls_values, r_ls_value):
        '''
        Return the derivatives of the shape functions
        '''
        #print "in dN ",r_pnt
        p_N_mtx = self.parent_fets.get_N_mtx(r_pnt)
        p_dNr_mtx = self.get_dNr_mtx(r_pnt)

        p_N_red = vstack((p_N_mtx[2,2::4],p_N_mtx[3,3::4]))
        
        second = np_sum((abs(node_ls_values) * p_dNr_mtx), axis = -1)
        
        third = np_sum((p_N_red[0] * abs(node_ls_values)))
        
        fourth = np_sum((p_dNr_mtx * node_ls_values), axis = -1)
        
        A_mtx = p_N_red * ( second - sign(r_ls_value)*fourth)[:,None]
        #A_mtx = p_N_red * ( -1.* sign(r_ls_value))
        B_mtx = p_dNr_mtx * (third - abs(r_ls_value))
        dNr_e_mtx = A_mtx + B_mtx
        return dNr_e_mtx
 def train_online(self, sentence, epoch = 20):
     #a deterministic seed for each sentence
     s1 = ' '.join(sentence[:10])[:10]
     #logger.info("online training for a single sentence '%s'" % s1)
     #start = time.time()
     #preprocess sentence such that words that doesn't occur is turned into none
     sentence = filter(lambda x: x in self.vocab, sentence)
     
     #generate a document vector for the unseen review
     doc_vec = empty((1,self.layer1_size), dtype = REAL)
     random.seed(uint32(self.hashfxn(s1[:10] + str(self.seed))))
     doc_vec = (random.rand(self.layer1_size) - 0.5) / self.layer1_size
     alpha = self.alpha
     #logger.info("before training %s", doc_vec)
     for _ in xrange(epoch):
         #the code below is adapted from train_sentence_dbow and train_sg_pair 
         #logger.info("epoch %d" % i)
         if self.sg:
             for w in sentence:
                 word = self.vocab[w]
                 neu1e = zeros(doc_vec.shape)
                 # work on the entire tree at once, to push as much work into numpy's C routines as possible (performance)
                 l2a = deepcopy(self.syn1[word.point])  # 2d matrix, codelen x layer1_size
                 fa = expit(dot(doc_vec, l2a.T))  # propagate hidden -> output
                 ga = (1 - word.code - fa) * alpha  # vector of error gradients multiplied by the learning rate
                 neu1e += dot(ga, l2a)  # save error
                 doc_vec += neu1e 
         else:
             #the code below is adapted from train_sentence_dm and train_cbow_pair
             for pos, w in enumerate(sentence):
                 word = self.vocab[w]
                 reduced_window = random.randint(self.window)  # `b` in the original doc2vec code
                 start = max(0, pos - self.window + reduced_window)
                 window_pos = enumerate(sentence[start : pos + self.window + 1 - reduced_window], start)
                 word2_indices = [self.vocab[word2].index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
                 l1 = np_sum(self.syn0[word2_indices], axis=0) + doc_vec  # 1 x layer1_size
                 if word2_indices and self.cbow_mean:
                     l1 /= (len(word2_indices) + 1)
                 neu1e = zeros(l1.shape)
                 l2a = self.syn1[word.point] # 2d matrix, codelen x layer1_size
                 #use scipy.special.expit to avoid overflow/underflow
                 fa = expit(dot(l1, l2a.T)) # propagate hidden -> output
                 ga = (1. - word.code - fa) * alpha # vector of error gradients multiplied by the learning rate
                 neu1e += dot(ga, l2a) # save error
                 doc_vec += neu1e
         
     #logger.info("after training %s", doc_vec)
     #elapsed = time.time() - start
     #logger.info("training 1 sentence took %.1fs" % elapsed)
     return doc_vec
Пример #53
0
def normilize_error_output(output):
    """ Normalize error output when result is non-scalar.

    Parameters
    ----------
    output : array-like
        Input can be any numpy array or matrix.

    Returns
    -------
    int, float
        Return sum of all absolute values.
    """
    return np_sum(np_abs(output))
def train_cbow_pair(model, word, word2_indices, l1, alpha, labels, train_w1=True, train_w2=True):
    neu1e = zeros(l1.shape)
    
    if model.hs:
    
        if len(word2_indices) >= 1:
            l1 = np_sum(model.syn0[word2_indices], axis=0) # 1 x layer1_size
        
            if word2_indices and model.cbow_mean:
                l1 /= len(word2_indices)
                
            l2a = deepcopy(model.syn1[word.point]) # 2d matrix, codelen x layer1_size
            fa = 1. / (1. + exp(-dot(l1, l2a.T))) # propagate hidden -> output
            ga = (1. - word.code - fa) * alpha # vector of error gradients multiplied by the learning rate

            if train_w1:
                model.syn1[word.point] += outer(ga, l1) # learn hidden -> output
            neu1e += dot(ga, l2a) # save error
 
        else:
            l2a = model.syn1[word.point] # 2d matrix, codelen x layer1_size
            fa = 1. / (1. + exp(-dot(l1, l2a.T))) # propagate hidden -> output
            ga = (1. - word.code - fa) * alpha # vector of error gradients multiplied by the learning rate
            
            if train_w1:
                model.syn1[word.point] += outer(ga, l1) # learn hidden -> output
            neu1e += dot(ga, l2a) # save error

     

    if model.negative:
        # use this word (label = 1) + `negative` other random words not from this sentence (label = 0)
        word_indices = [word.index]
        while len(word_indices) < model.negative + 1:
            w = model.table[random.randint(model.table.shape[0])]
            if w != word.index:
                word_indices.append(w)
        l2b = model.syn1neg[word_indices] # 2d matrix, k+1 x layer1_size
        fb = 1. / (1. + exp(-dot(l1, l2b.T))) # propagate hidden -> output
        gb = (labels - fb) * alpha # vector of error gradients multiplied by the learning rate

        if train_w1:
            model.syn1neg[word_indices] += outer(gb, l1) # learn hidden -> output
        neu1e += dot(gb, l2b) # save error
        
    if train_w2:
        model.syn0[word2_indices] += neu1e # learn input -> hidden, here for all words in the window separately
    
    return neu1e
Пример #55
0
    def train_epoch(self, input_train, target_train):
        centers = self.centers
        old_centers = centers.copy()
        output_train = self.predict(input_train)

        for i, center in enumerate(centers):
            positions = argwhere(output_train[:, 0] == i)

            if not np_any(positions):
                continue

            class_data = take(input_train, positions, axis=0)
            centers[i, :] = (1 / len(class_data)) * np_sum(class_data, axis=0)

        return np_abs(old_centers - centers)
Пример #56
0
def score_document_labeled_cbow(model, document, labels=None, work=None, neu1=None):

        word_vocabs = [model.wv.vocab[w] for w in document if w in model.wv.vocab]

        if labels is not None:
            targets = [model.lvocab[label] for label in labels]
        else:
            targets = model.lvocab.values()
            labels = model.lvocab.keys()

        word2_indices = [word2.index for word2 in word_vocabs]
        l1 = np_sum(model.wv.syn0[word2_indices], axis=0)  # 1 x layer1_size
        if word2_indices and model.cbow_mean:
            l1 /= len(word2_indices)
        return zip(labels, score_cbow_labeled_pair(model, targets, l1))
Пример #57
0
    def train_epoch(self, input_data, target_train):
        weights = self.weights

        minimized = dot(input_data, weights)
        reconstruct = dot(minimized, weights.T)
        error = input_data - reconstruct

        weights += self.step * dot(error.T, minimized)

        mae = np_sum(np_abs(error)) / input_data.size

        del minimized
        del reconstruct
        del error

        return mae
Пример #58
0
 def recognize(self, image):
   mem = self.mem
   converg = 0
   result_img = copy(image) #np.array(dummy.shared_array) #copy(image)
   for idx in range(8):
     pred_img = copy(result_img)
     col = 0
     for idx1 in range(self.im_size_sq):
       assoc = np_sum(pred_img * mem[idx1])
       result_img[idx1] = 1 if neuro_tools.sign(assoc) else -1
       if pred_img[idx1] == result_img[idx1]:
         col += 1
       converg += abs(pred_img[idx1] - result_img[idx1])
     if col == self.im_size_sq:
       return converg / (self.img_in_memory ** .5)
   return sys.float_info.max