示例#1
0
def context_fn(ind_sent_list):

    index = ind_sent_list[0]
    sent_list = ind_sent_list[1]

    env_matrix = context_fn.env_matrix

    temp_dir = context_fn.temp_dir

    # Very slow in SciPy v. 0.7.2. Same for dok_matrix.
    # mem_matrix = lil_matrix(env_matrix.shape)

    # Occupies too much memory
    # mem_matrix = np.zeros(env_matrix.shape, dtype=np.float32)

    # So, using dictionary as temporary sparse matrix
    mem_matrix = dict()


    print 'Training on chunk of sentences', index

    for sent in sent_list:

        for i,word in enumerate(sent):

            if word not in mem_matrix:
                mem_matrix[word] = np.zeros(env_matrix.shape[1], dtype=np.float32)

            # Left context
            for ctxword in sent[:i]:

                mem_matrix[word] += env_matrix[ctxword,:]

                # mem_matrix[word,:] += env_matrix[ctxword,:]

            # Right context
            for ctxword in sent[i+1:]:

                mem_matrix[word] += env_matrix[ctxword,:]
                
                # mem_matrix[word,:] += env_matrix[ctxword,:]


    print 'Chunk of sentences', index
    # print mem_matrix
                    
    tmp_file =\
        os.path.join(temp_dir, 'context-' + str(index) + '.tmp.npy')

    print 'Dumping to temp file\n'\
          '  ', tmp_file
    
    dump_matrix(mem_matrix, tmp_file)
    
    return tmp_file
示例#2
0
def order_fn(ind_sent_list):

    index = ind_sent_list[0]
    sent_list = ind_sent_list[1]


    env_matrix = order_fn.env_matrix
    lmbda = order_fn.lmbda
    temp_dir = order_fn.temp_dir
    left_permutation = order_fn.left_permutation
    right_permutation = order_fn.right_permutation
    placeholder = order_fn.placeholder


    # Very slow in SciPy v. 0.7.2. Same for dok_matrix.
    # mem_matrix = lil_matrix(env_matrix.shape)

    # Occupies too much memory
    # mem_matrix = np.zeros(env_matrix.shape, dtype=np.float32)

    # So, using dictionary as temporary sparse matrix
    mem_matrix = dict()


    print 'Training on chunk of sentences', index

    for sent in sent_list:

        for k in xrange(1, lmbda):
            
            for i,word in enumerate(sent):
                
                a = i - k
                left = 0 if a < 0 else a
                b = i + k
                right = len(sent) if b > len(sent) else b
                
                vector_list = ([env_matrix[w] for w in sent[left:i]]
                               + [placeholder]
                               + [env_matrix[w] for w in sent[i+1:right]])


                def f(vector_list):

                    if len(vector_list) == 0:
                        return np.zeros(self.dimension)

                    elif len(vector_list) == 1:
                        return vector_list[0]

                    else:
                        v1 = dual.fft(left_permutation(f(vector_list[:-1])))
                        v2 = dual.fft(right_permutation(vector_list[len(vector_list)-1]))
                        return dual.ifft(v1 * v2)


                order_vector = f(vector_list)


                if word not in mem_matrix:
                    mem_matrix[word] = np.zeros(env_matrix.shape[1], dtype=np.float32)

                mem_matrix[word] += order_vector



    print 'Chunk of sentences', index, '\n', mem_matrix
                    
    tmp_file =\
        os.path.join(temp_dir, 'order-' + str(index) + '.tmp.npy')

    print 'Dumping to temp file\n'\
          '  ', tmp_file
    
    dump_matrix(mem_matrix, tmp_file)
    
    return tmp_file
示例#3
0
    def dump_matrix(self, filename):

        dump_matrix(self.matrix, filename)