Exemplo n.º 1
0
Arquivo: fm.py Projeto: misads/ctr
    def train(self, train_data, dev_data=None):
        hparams = self.hparams
        sess = self.sess
        assert len(train_data[0]) == len(
            train_data[1]), "Size of features data must be equal to label"
        for epoch in range(hparams.epoch):
            info = {}
            info['loss'] = []
            info['norm'] = []
            start_time = time.time()
            for idx in range(len(train_data[0]) // hparams.batch_size + 3):
                try:
                    if hparams.steps <= idx:
                        T = (time.time() - start_time)
                        self.eval(T, dev_data, hparams, sess)
                        break
                except:
                    pass
                if idx * hparams.batch_size >= len(train_data[0]):
                    T = (time.time() - start_time)
                    self.eval(T, dev_data, hparams, sess)
                    break

                batch=train_data[0][idx*hparams.batch_size:\
                                    min((idx+1)*hparams.batch_size,len(train_data[0]))]
                batch = utils.hash_batch(batch, hparams)
                label=train_data[1][idx*hparams.batch_size:\
                                    min((idx+1)*hparams.batch_size,len(train_data[1]))]
                loss,_,norm=sess.run([self.loss,self.update,self.grad_norm],\
                                     feed_dict={self.features:batch,self.label:label})
                info['loss'].append(loss)
                info['norm'].append(norm)
                if (idx + 1) % hparams.num_display_steps == 0:
                    info['learning_rate'] = hparams.learning_rate
                    info["train_ppl"] = np.mean(info['loss'])
                    info["avg_grad_norm"] = np.mean(info['norm'])
                    utils.print_step_info("  ", epoch, idx + 1, info)
                    del info
                    info = {}
                    info['loss'] = []
                    info['norm'] = []
                if (idx + 1) % hparams.num_eval_steps == 0 and dev_data:
                    T = (time.time() - start_time)
                    self.eval(T, dev_data, hparams, sess)

        self.saver.restore(sess, 'model_tmp/model')
        T = (time.time() - start_time)
        self.eval(T, dev_data, hparams, sess)
        os.system("rm -r model_tmp")
Exemplo n.º 2
0
    def train(self,train,dev):
        hparams=self.hparams
        sess=self.sess
        train_single_features=train[hparams.single_features].values
        train_label=train[hparams.label].values
        if hparams.multi_features is not None:
            train_multi_features=train[hparams.multi_features].values
            train_multi_weights=train[hparams.multi_weights].values
        if hparams.dense_features is not None:
            train_dense_features=train[hparams.dense_features].values
        if hparams.kv_features is not None:
            train_kv_features=train[hparams.kv_features].values
        if hparams.cross_features is not None:
            train_cross_features=train[hparams.cross_features].values
        for epoch in range(hparams.epoch):
            info={}
            info['loss']=[]
            info['norm']=[]
            start_time = time.time()
            for idx in range(len(train)//hparams.batch_size+3):
                if idx*hparams.batch_size>=len(train):
                    T=(time.time()-start_time)
                    info['learning_rate']=hparams.learning_rate
                    info["train_ppl"]= np.mean(info['loss'])
                    info["avg_grad_norm"]=np.mean(info['norm'])
                    utils.print_step_info("  ", epoch,idx+1, info)
                    if dev is not None:
                        self.eval(T,dev,hparams,sess)
                    break
                feed_dic={} 
                single_batch=train_single_features[idx*hparams.batch_size:min((idx+1)*hparams.batch_size,len(train))]
                single_batch=utils.hash_single_batch(single_batch,hparams)
                feed_dic[self.single_features]=single_batch
                
                if hparams.multi_features is not None:
                    multi_batch=train_multi_features[idx*hparams.batch_size:min((idx+1)*hparams.batch_size,len(train))]
                    multi_weight=train_multi_weights[idx*hparams.batch_size:min((idx+1)*hparams.batch_size,len(train))]         
                    multi_batch,multi_weights=utils.hash_multi_batch(multi_batch,multi_weight,hparams)
                    feed_dic[self.multi_features]=multi_batch 
                    feed_dic[self.multi_weights]=multi_weights
                if hparams.dense_features is not None:
                    feed_dic[self.dense_features]=train_dense_features[idx*hparams.batch_size:\
                                                                       min((idx+1)*hparams.batch_size,len(train))]
                if hparams.kv_features is not None:
                    feed_dic[self.kv_features]=train_kv_features[idx*hparams.batch_size:\
                                                                       min((idx+1)*hparams.batch_size,len(train))]                         
                if hparams.cross_features is not None:
                    cross_batch=train_cross_features[idx*hparams.batch_size:min((idx+1)*hparams.batch_size,len(train))]
                    cross_batch=utils.hash_single_batch(cross_batch,hparams)
                    feed_dic[self.cross_features]=cross_batch                            
                label=train_label[idx*hparams.batch_size: min((idx+1)*hparams.batch_size,len(train))]
                feed_dic[self.label]=label
                feed_dic[self.use_norm]=True
                loss,_,norm=sess.run([self.loss,self.update,self.grad_norm],feed_dict=feed_dic)

                info['loss'].append(loss)
                info['norm'].append(norm)
                if (idx+1)%hparams.num_display_steps==0:                   
                    info['learning_rate']=hparams.learning_rate
                    info["train_ppl"]= np.mean(info['loss'])
                    info["avg_grad_norm"]=np.mean(info['norm'])
                    utils.print_step_info("  ", epoch,idx+1, info)
                    del info
                    info={}
                    info['loss']=[]
                    info['norm']=[]

        T=(time.time()-start_time)
        return self.best_score