def train(self): print 'loading data...' train_data_loader=DataUtils(self.config,is_train=True) self.config.vocab_size=train_data_loader.vocab_size self.config.num_batches=train_data_loader.num_batches print 'begin train...' if self.config.init_from is not None: assert os.path.isdir(self.config.init_from), '{} must be a directory'.format(self.config.init_from) self.ckpt = tf.train.get_checkpoint_state(self.config.init_from) assert self.ckpt, 'No checkpoint found' assert self.ckpt.model_checkpoint_path, 'No model path found in checkpoint' model=SiameseLstm(self.config,is_training=True) tf.summary.scalar('train_loss', model.cost) merged = tf.summary.merge_all() with tf.Session() as sess: train_writer = tf.summary.FileWriter(self.config.log_dir, sess.graph) sess.run(tf.global_variables_initializer()) # restore model if self.config.init_from is not None: model.saver.restore(sess, self.ckpt.model_checkpoint_path) for e in xrange(self.config.num_epochs): train_data_loader.reset_batch() b=0 while not train_data_loader.eos: b+=1 start=time.time() x1_batch, x2_batch, y_batch = train_data_loader.next_batch() # random exchange x1_batch and x2_batch if random.random() > 0.5: feed = {model.input_x1: x1_batch, model.input_x2: x2_batch, model.y_data: y_batch} else: feed = {model.input_x1: x2_batch, model.input_x2: x1_batch, model.y_data: y_batch} fetches=[model.cost, merged, model.train_op] train_loss,summary,_=sess.run(fetches=fetches,feed_dict=feed) end = time.time() print '{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}'.format(e * self.config.num_batches + b, self.config.num_epochs * self.config.num_batches, e, train_loss, end - start) if (e * self.config.num_batches + b) % 500 == 0: checkpoint_path = os.path.join(self.config.save_dir, 'model.ckpt') model.saver.save(sess, checkpoint_path, global_step=e * self.config.num_batches + b) print 'model saved to {}'.format(checkpoint_path) if b % 20 == 0: train_writer.add_summary(summary, e * self.config.num_batches + b)
def predict(self): print 'loading data...' test_data_loader = DataUtils(self.config, is_train=False) self.config.vocab_size = test_data_loader.vocab_size print 'creating model...' model = SiameseLstm(self.config, is_training=False) print 'begin train...' with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # restore model print 'restore model...' model_checkpoint_path = os.path.join(self.config.save_dir, 'model.ckpt-500') model.saver.restore(sess, model_checkpoint_path) print 'predict...' feed = { model.input_x1: test_data_loader.x1, model.input_x2: test_data_loader.x2, model.y_data: test_data_loader.y } fetches = [model.logits_1, model.logits_2, model.Ew] result = sess.run(fetches=fetches, feed_dict=feed) results = [0 if i < 0.5 else 1 for i in result[2]] auc = metrics.roc_auc_score(test_data_loader.y, results) print 'auc:', auc
class ModelTrain(object): def __init__(self): self.utils=DataUtils() self.config=Config() def train_step(self): print("loading the dataset...") mnist=self.utils.load_data(config=self.config) print("begin training") with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-1 * self.config.init_scale, 1 * self.config.init_scale) with tf.variable_scope('model',reuse=None,initializer=initializer): softmodel = SoftModel(config=self.config, is_training=True) #add summary train_summary_dir=os.pardir.join(self.config.out_dir,'summaries','train') train_summary_writer=tf.summary.FileWriter(train_summary_dir, session.graph) #add checkpoint checkpoint_dir=os.path.abspath(os.path.join(self.config.out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) tf.initialize_all_variables().run() global_steps = 1 for i in range(self.config.num_epoch): print("the %d epoch training..." % (i + 1)) global_steps=self.run_epoch() if i%self.config.checkpoint_steps==0: path = softmodel.saver.save(session, checkpoint_prefix, global_steps) print("Saved model chechpoint to{}\n".format(path)) test_accuracy = self.evaluate(test_model, session, test_data) print("the test data accuracy is %f" % test_accuracy) print("program end!") def run_epoch(self): global_steps=1 return global_steps def evaluate(self,test_model, session, test_data): acc=float(0) return acc
class ModelPredict(object): def __init__(self): self.utils=DataUtils() self.config=Config() def predict(self): print("loading the dataset...") mnist = self.utils.load_data(config=self.config) print("begin predict...") with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-1 * self.config.init_scale, 1 * self.config.init_scale) with tf.variable_scope('model', reuse=None, initializer=initializer): softmodel = SoftModel(config=self.config, is_training=False) ckpt=tf.train.get_checkpoint_state(self.config.model_dir) softmodel.saver.restore(session, ckpt.model_checkpoint_path) res=session.run(softmodel.pred,feed_dict={softmodel.input_data:mnist[0],softmodel.target:mnist[1]})
def predict(self): print 'loading data...' test_data_loader = DataUtils(self.config, is_train=False) self.config.vocab_size = test_data_loader.vocab_size print 'creating model...' model = SimSiamese(self.config, is_training=False) print 'begin train...' with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # restore model print 'restore model...' model_checkpoint_path = os.path.join(self.config.save_dir, 'model.ckpt-500') model.saver.restore(sess, model_checkpoint_path) print 'predict...' feed = { model.input_x1: test_data_loader.x1, model.input_x2: test_data_loader.x2, model.y_data: test_data_loader.y } ins = tf.nn.sigmoid(model.logits) # fetches=[model.logits,ins] fetches = [ ins, tf.argmax(model.logits, axis=1), tf.argmax(ins, axis=1) ] inss, result, res = sess.run(fetches=fetches, feed_dict=feed) print 'result:', [i for i in result if i > 0] print 'res:', res print 'ins:', inss # print 'result:',len(result),len(result[0]),len(result[0][0]) #softmax处理 # results=[0 if i<0.5 else 1 for i in result[2]] auc = metrics.roc_auc_score(test_data_loader.y, result) print 'auc:', auc
class MyTestBase(TestCase): """ This should be the basis of all tests, with further overrides provided by child classes. This should keep some basic functionality in mind and make test writing efficient """ @classmethod def setUpClass(cls): global engine global Session dbport = '5432' dbname = '{{cookiecutter.testdbname}}' uri = 'postgresql+psycopg2://[email protected]:%s/%s' % (dbport, dbname) cls.db_url = uri if engine is None: engine = create_engine(uri) Session = scoped_session(sessionmaker(bind=engine)) cls.factory_sessions = Session cls.class_session = Session() @classmethod def tearDownClass(cls): cls.class_session.rollback() def setUp(self): # We want a per-test session within the class-wide session, like a layered burrito self.session = self.class_session self.session.begin_nested() self.datautils = DataUtils(self.session) def tearDown(self): self.session.rollback() def create_customer(self, extra_data=None): """ Creates a customer for testing. """ return self.datautils.create_user(extra_data)
def __init__(self): self.utils = DataUtils() self.mlp = MlpModel() self.config = Config()
class Train(): def __init__(self): self.utils = DataUtils() self.mlp = MlpModel() self.config = Config() def run(self): #加载数据ß labels, features = self.utils.load_train_data( filename= '/Users/yingjie10/deeptext/data/semanticmatch/data_200pos.txt') kDataTrain, kDataTrainC, kDataTest, kDataTestC = self.utils.kfold( features, labels) # 创建模型保存目录 model_dir = "mlpmodel" model_name = "match" self.mkdir(model_dir) acc = [] for index in range(len(kDataTrain)): #处理数据 print "cross validation:", index ty, tx = kDataTrainC[index], kDataTrain[index] testy, testx = kDataTestC[index], kDataTest[index] ty = self.utils.dense_to_one_hot(ty) testy = self.utils.dense_to_one_hot(testy) #特征维数 n_input = tx.shape[1] # tf Graph input x = tf.placeholder("float", [None, n_input]) y = tf.placeholder("float", [None, self.config.n_class]) # Store layers weight & biases weights = { 'h1': tf.Variable(tf.random_normal([n_input, self.config.n_hidden_1])), 'h2': tf.Variable( tf.random_normal( [self.config.n_hidden_1, self.config.n_hidden_2])), 'out': tf.Variable( tf.random_normal( [self.config.n_hidden_2, self.config.n_class])) } biases = { 'b1': tf.Variable(tf.random_normal([self.config.n_hidden_1])), 'b2': tf.Variable(tf.random_normal([self.config.n_hidden_2])), 'out': tf.Variable(tf.random_normal([self.config.n_class])) } # Construct model pred = self.mlp.create_model(x, weights, biases) # Define loss and optimizer cost = tf.reduce_mean( -tf.reduce_sum(pred * tf.log(y), reduction_indices=[1])) optimizer = tf.train.GradientDescentOptimizer( learning_rate=self.config.learning_rate).minimize(cost) # 定义模型保存对象 saver = tf.train.Saver() # Initializing the variables init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for epoch in range(self.config.training_epochs): avg_cost = 0 total_batch = int(tx.shape[0] / self.config.batch_size) for start, end in zip( range(0, len(tx), self.config.batch_size), range(self.config.batch_size, len(tx), self.config.batch_size)): _, loss = sess.run([optimizer, cost], feed_dict={ x: tx[start:end], y: ty[start:end] }) avg_cost += loss / total_batch if epoch % self.config.display_step == 0: print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost)) print("Optimization Finished!") correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) print 'acc:', accuracy result = accuracy.eval({x: testx, y: testy}) print("Accuracy:", result) acc.append(result) # 保存模型 saver.save(sess, os.path.join(model_dir, model_name, str(index))) print("保存模型成功!") print "cross validation result" print "accuracy:", np.mean(acc) print("训练完成!") ''' 创建目录 ''' def mkdir(self, model_dir): if not os.path.exists(model_dir): os.mkdir(model_dir)
def __init__(self): self.utils=DataUtils() self.config=Config()
def setUp(self): # We want a per-test session within the class-wide session, like a layered burrito self.session = self.class_session self.session.begin_nested() self.datautils = DataUtils(self.session)
class Predict(): def __init__(self): self.utils = DataUtils() self.mlp = MlpModel() self.config = Config() def run(self): #加载数据 label, feature = self.utils.load_predict_data( filename= '/Users/yingjie10/deeptext/data/semanticmatch/data_200pos.txt') labels, features = np.asarray(label), np.asarray(feature) # 创建模型保存目录 model_dir = "mlpmodel" model_name = "match" index = 3 #特征维数 n_input = features.shape[1] # tf Graph input x = tf.placeholder("float", [None, n_input]) # Store layers weight & biases weights = { 'h1': tf.Variable(tf.random_normal([n_input, self.config.n_hidden_1])), 'h2': tf.Variable( tf.random_normal( [self.config.n_hidden_1, self.config.n_hidden_2])), 'out': tf.Variable( tf.random_normal([self.config.n_hidden_2, self.config.n_class])) } biases = { 'b1': tf.Variable(tf.random_normal([self.config.n_hidden_1])), 'b2': tf.Variable(tf.random_normal([self.config.n_hidden_2])), 'out': tf.Variable(tf.random_normal([self.config.n_class])) } # Construct model pred = self.mlp.create_model(x, weights, biases) # 定义模型保存对象 saver = tf.train.Saver() # 预测函数 y = tf.nn.softmax(pred) with tf.Session() as sess: # 恢复模型 saver.restore(sess, os.path.join(model_dir, model_name, str(index))) # 预测 result = sess.run(y, feed_dict={x: features}) results = [res.argmax() for res in result] print 'result:', results auc = metrics.roc_auc_score(label, results) print 'auc:', auc