Пример #1
0
    def train(self,
              corpus,
              token_type='sentences',
              stoplist=list(),
              n_columns=None,
              env_matrix=None,
              ctx_matrix=None,
              ord_matrix=None):


        if ctx_matrix == None or ord_matrix == None:

            if env_matrix == None:

                _env_matrix == None
                
                env_model = BeagleEnvironment()
                env_model.train(corpus,
                                token_type=token_type,
                                stoplist=stoplist,
                                env_matrix=env_matrix)

                env_matrix = env_model.matrix


            if ctx_matrix == None:
                ctx_model = BeagleContext()
                ctx_model.train(corpus,
                                token_type=token_type,
                                stoplist=stoplist,
                                env_matrix=env_matrix)

                ctx_matrix = ctx_model.matrix

            else:
                if _env_matrix == None:
                    print 'Warning: Context and Order models '\
                          'trained with different Environment models.'


            if ord_matrix == None:

                ord_model = BeagleOrder()
                ord_model.train(corpus,
                                token_type=token_type,
                                stoplist=stoplist,
                                env_matrix=env_matrix)

                ord_matrix = ord_model.matrix

            else:
                if _env_matrix == None:
                    print 'Warning: Context and Order models '\
                          'trained with different Environment models.'

        
        self.matrix = ctx_matrix
        self.matrix += ord_matrix
Пример #2
0
def test_BeagleComposite_2():

    from inphosemantics import load_picklez, dump_matrix

    root = 'test-data/iep/plato/'

    corpus_filename =\
        root + 'corpus/iep-plato.pickle.bz2'

    env_filename =\
        root + 'models/iep-plato-beagleenviroment-sentences.npy'

    matrix_filename =\
        root + 'models/iep-plato-beaglecomposite-sentences.npy'


    print 'Loading corpus\n'\
          '  ', corpus_filename
    c = load_picklez(corpus_filename)
    

    print 'Loading environment model\n'\
          '  ', env_filename
    e = BeagleEnvironment()
    e.load_matrix(env_filename)
    print e.matrix

    print 'Training model'
    m = BeagleComposite()
    m.train(c, env_matrix=e.matrix)
    print m.matrix


    print 'Dumping matrix to\n'\
          '  ', matrix_filename
    m.dump_matrix(matrix_filename)
    
    return m
Пример #3
0
    def train(self,
              corpus,
              token_type='sentences',
              stoplist=None,
              n_columns=None,
              env_matrix=None,
              placeholder=None,
              right_permutation=None,
              left_permutation=None,
              lmbda=7):


        if env_matrix == None:
            env_model = BeagleEnvironment()
            env_model.train(corpus,
                            token_type,
                            stoplist,
                            n_columns)
        else:
            env_model = BeagleEnvironment(env_matrix)

        __shape = env_model.matrix.shape

        order_fn.env_matrix = env_model.matrix

        del env_model
        del env_matrix


        
        temp_dir = tempfile.mkdtemp()
        order_fn.temp_dir = temp_dir


        order_fn.lmbda = lmbda


        if not placeholder:

            placeholder = np.random.random(__shape[1])
            placeholder *= 2
            placeholder -= 1
            placeholder /= np.sum(placeholder**2)**(1./2)
            order_fn.placeholder = placeholder
                
        print 'Placeholder:', order_fn.placeholder
        print 'Norm of placeholder', np.sum(order_fn.placeholder**2)**(1./2)



        if not right_permutation or not left_permutation:
            permutations = RandomPermutations(__shape[1], 2)

        if right_permutation:
            order_fn.right_permutation = right_permutation
        else:
            order_fn.right_permutation = permutations.permutations[0]

        if left_permutation:
            order_fn.left_permutation = left_permutation
        else:
            order_fn.left_permutation = permutations.permutations[1]

        print 'Right permutation', order_fn.right_permutation(np.arange(__shape[1]))

        print 'Left permutation', order_fn.left_permutation(np.arange(__shape[1]))




        sentences = corpus.view_tokens(token_type)
        
        # number of sentences in a chunk of sentences
        n = 500

        sent_lists = np.split(np.asarray(sentences, dtype=np.object_),
                              np.arange(n, len(sentences), n))

        ind_sent_lists = list(enumerate(sent_lists))



        # Map
        p = mp.Pool()
        results = p.map(order_fn, ind_sent_lists, 1)
        p.close()



        del order_fn.env_matrix


        # Reduce
        self.matrix = np.zeros(__shape, dtype=np.float32)
        
        for result in results:

            print 'Reducing', result

            summand = load_matrix(result)

            for i,row in summand.iteritems():
                self.matrix[i,:] += row

            # self.matrix += summand


        # Clean up
        print 'Deleting temporary directory\n'\
              '  ', temp_dir

        shutil.rmtree(temp_dir)
Пример #4
0
    def train(self,
              corpus,
              token_type='sentences',
              stoplist=list(),
              n_columns=None,
              env_matrix=None):


        if env_matrix == None:
            env_model = BeagleEnvironment()
            env_model.train(corpus,
                            token_type,
                            stoplist,
                            n_columns)
        else:
            env_model = BeagleEnvironment(env_matrix)

        #Apply stoplist to environment matrix
        env_model.filter_rows(stoplist)


        __shape = env_model.matrix.shape


        context_fn.env_matrix = env_model.matrix

        del env_model
        del env_matrix


        
        temp_dir = tempfile.mkdtemp()
        context_fn.temp_dir = temp_dir


        sentences = corpus.view_tokens(token_type)
        
        # number of sentences in a chunk of sentences
        n = 500

        sent_lists = np.split(np.asarray(sentences, dtype=np.object_),
                              np.arange(n, len(sentences), n))

        ind_sent_lists = list(enumerate(sent_lists))


        # Map
        p = mp.Pool()
        results = p.map(context_fn, ind_sent_lists, 1)
        p.close()


        del context_fn.env_matrix


        # Reduce
        self.matrix = np.zeros(__shape, dtype=np.float32)
        
        for result in results:

            print 'Reducing', result

            summand = load_matrix(result)
            # self.matrix += summand

            for i,row in summand.iteritems():
                self.matrix[i,:] += row

        # Clean up
        print 'Deleting temporary directory\n'\
              '  ', temp_dir

        shutil.rmtree(temp_dir)