def filled_db(): imhere.app.secret_key = str(uuid.uuid4()) um = users_model.Users() um.get_or_create_user(stu) um.get_or_create_user(newt) m = model.Model() ds = m.get_client() key = ds.key('student') entity = datastore.Entity( key=key) entity.update({ 'sid': stu['id'], 'uni': 'cs4156' }) ds.put(entity) key = ds.key('teacher') entity = datastore.Entity( key=key) entity.update({ 'tid': newt['id'] }) ds.put(entity) tm = teachers_model.Teachers(newt['id']) course_name = 'Writing' cid = tm.add_course(course_name) cm = courses_model.Courses(cid) cm.add_student('cs4156') yield cid
def post(self): print(self.request) draw = self.json_args['draw'] start = self.json_args['start'] length = self.json_args['length'] order = self.json_args['order'][0]['dir'] ind_column = self.json_args['order'][0]['column'] col_name = self.json_args['columns'][ind_column]['data'] search_data = {} search_data['from'] = self.json_args['from'] search_data['to'] = self.json_args['to'] search_data['day'] = self.json_args['day'] if len(filter(lambda x: x != None, search_data.values())) < 3: search_data = None flights, total_num, filtered = yield tm.list_flights( search_data, col_name, start, length, order) response = { 'draw': draw, 'recordsTotal': total_num, 'recordsFiltered': filtered, 'data': flights } # response = json.dumps(response, default=utils.json_serial) # print countries # countries = "?" self.set_header('Content-Type', 'text/javascript;charset=utf-8') self.write(model.Model(response).json())
def main(): """Main function""" args = parse_args() smiles_list = uc.read_smi_file(args.input_smiles_path) LOG.info("Building vocabulary") tokenizer = mv.SMILESTokenizer() vocabulary = mv.create_vocabulary(smiles_list, tokenizer=tokenizer) tokens = vocabulary.tokens() LOG.info("Vocabulary contains %d tokens: %s", len(tokens), tokens) network_params = { 'num_layers': args.num_layers, 'layer_size': args.layer_size, 'cell_type': args.cell_type, 'embedding_layer_size': args.embedding_layer_size, 'dropout': args.dropout } model = mm.Model(no_cuda=True, vocabulary=vocabulary, tokenizer=tokenizer, network_params=network_params, max_sequence_length=args.max_sequence_length) LOG.info("Saving model at %s", args.output_model_path) model.save(args.output_model_path)
def testAverageGradients(self): """ Checks the correct average for multiple towers and multiple variables. The test model has 2 towers with 2 variables shared between them. var_0 is getting 1.0 + 3.0 as gradient -> average: 2.0 var_1 is getting 2.0 + 4.0 as gradient -> average: 3.0 """ with tf.Graph().as_default(): with tf.Session() as session: test_model = model.Model(self.hparams) grad_0 = tf.constant(1.0) grad_1 = tf.constant(2.0) tower_0 = [(grad_0, 'var_0'), (grad_1, 'var_1')] grad_2 = tf.constant(3.0) grad_3 = tf.constant(4.0) tower_1 = [(grad_2, 'var_0'), (grad_3, 'var_1')] tower_grads = [tower_0, tower_1] average_grads = test_model._average_gradients(tower_grads) self.assertEqual(len(average_grads), 2) self.assertEqual('var_0', average_grads[0][1]) average_grad_0 = session.run(average_grads[0][0]) self.assertEqual(2.0, average_grad_0) self.assertEqual('var_1', average_grads[1][1]) average_grad_1 = session.run(average_grads[1][0]) self.assertEqual(3.0, average_grad_1)
def delete_session(self, del_seid): ds = model.Model().get_client() query = ds.query(kind='sessions') query.add_filter('seid', '=', int(del_seid)) results = list(query.fetch()) for result in results: key = ds.key('sessions', int(del_seid)) ds.delete(key)
def upper_partition_course_entity(self): ds = model.Model().get_client() key = ds.key('courses') entity = datastore.Entity(key=key) entity.update({'name': "Strings123"}) ds.put(entity) cid = int(entity.key.id) entity.update({'name': "Strings123", 'cid': int(cid)}) ds.put(entity) return entity
def __init__(self, config_data): self._id = config_data['id'] self._models = {} self._settings = {} for id, settingConfig in config_data['settings'].items(): self._settings[id] = typeMap.typeMap[settingConfig['type']](settingConfig) for id, modelConfig in config_data['models'].items(): self._models[id] = model.Model(modelConfig)
def run(self): """ Performs the creation of the model. """ if self._already_run: return LOG.info("Building vocabulary") tokenizer = mv.SMILESTokenizer() vocabulary = mv.create_vocabulary(self._smiles_list, tokenizer=tokenizer) tokens = vocabulary.tokens() LOG.info("Vocabulary contains %d tokens: %s", len(tokens), tokens) LOG.info("Saving model at %s", self._output_model_path) network_params = { 'num_layers': self._num_layers, 'layer_size': self._layer_size, 'embedding_layer_size': self._embedding_layer_size, 'dropout': self._dropout, 'memory_cells': self._memory_cells, 'cell_size': self._cell_size, 'read_heads': self._read_heads, 'num_controller_layers': self._num_controller_layers, 'controller_type': self._controller_type, 'model_type': self._model_type } model = mm.Model(vocabulary=vocabulary, tokenizer=tokenizer, network_params=network_params, model_type=self._model_type, max_sequence_length=self._max_sequence_length) model_folder = model.model_name.split('.')[0] storage_folder_path = os.path.join(self._output_model_path, model_folder) i = 0 while os.path.exists(storage_folder_path): if i == 0: storage_folder_path += '(%s)' % i else: cut_path = storage_folder_path[:-3] storage_folder_path = cut_path + '(%s)' % i i += 1 os.makedirs(storage_folder_path) self._output_model_path = os.path.join(storage_folder_path, model.model_name) model.model_dir = storage_folder_path model.save(self._output_model_path) LOG.info('Model saved!') LOG.info(model.__dict__)
def test_print(self): ds = model.Model().get_client() query = ds.query(kind='sessions') #query.add_filter('date', '=', self.date) #query.add_filter('cid', '=', 68) result = list(query.fetch()) print( '\n' + 'Sessions =================================================================================================' ) for session in result: print(session)
def test_get_current_roster_size(self): ssm = sessions_model.Sessions() cid = self.upper_partition_course_entity()['cid'] seid = ssm.open_session(cid) assert ssm.get_current_roster_size() == 0 ds = model.Model().get_client() key = ds.key('enrolled_in') entity = datastore.Entity(key=key) entity.update({'sid': 653, 'cid': cid}) ds.put(entity) assert ssm.get_current_roster_size() == 1 self.delete_session(seid)
def testModelInitialization(self): """Checks the variables declared in the init method. Initialization step should only declare global_step as a non trainable variable. """ with tf.Graph().as_default(): model.Model(self.hparams) trainable_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES) self.assertEqual(len(trainable_vars), 0) global_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) self.assertEqual(len(global_vars), 1) self.assertStartsWith(global_vars[0].name, 'global_step')
def register(): if request.method == 'GET': return render_template( 'register.html', name=flask.session['google_user']['name'], is_student=flask.session['is_student'], is_teacher=flask.session['is_teacher'] ) elif request.method == 'POST': m = model.Model() ds = m.get_client() if request.form['type'] == 'student': # check that uni doesn't already exist # if it doesn't, continue student creation um = users_model.Users() if not um.is_valid_uni(request.form['uni']): key = ds.key('student') entity = datastore.Entity( key=key) entity.update({ 'sid': flask.session['id'], 'uni': request.form['uni'] }) ds.put(entity) flask.session['is_student'] = True return flask.redirect(flask.url_for('main_student')) else: return render_template( 'register.html', name=flask.session['google_user']['name'], invalid_uni=True) else: try: key = ds.key('teacher') entity = datastore.Entity( key=key) entity.update({ 'tid': flask.session['id'] }) ds.put(entity) flask.session['is_teacher'] = True except: pass return flask.redirect(flask.url_for('main_teacher'))
def run(self): """ Carries out the creation of the model. """ tokenizer = voc.SMILESTokenizer() vocabulary = voc.create_vocabulary(self._smiles_list, tokenizer=tokenizer) network_params = { 'num_layers': self._num_layers, 'layer_size': self._layer_size, 'cell_type': self._cell_type, 'embedding_layer_size': self._embedding_layer_size, 'dropout': self._dropout, 'layer_normalization': self._layer_normalization } model = reinvent.Model(no_cuda=True, vocabulary=vocabulary, tokenizer=tokenizer, network_params=network_params, max_sequence_length=self._max_sequence_length) model.save(self._output_model_path) return model
def lower_partition_session_entity(self): ds = model.Model().get_client() key = ds.key('sessions') entity = datastore.Entity(key=key) entity.update({ 'date': '', 'cid': 99999999, 'window_open': False, 'self.secret': -1 }) ds.put(entity) seid = int(entity.key.id) entity.update({ 'date': '', 'cid': 99999999, 'window_open': False, 'self.secret': -1, 'seid': int(seid) }) ds.put(entity) return entity
for i in range(1, shape[0]-1): for j in range(1, shape[1]-1): if ((y[i][j-1] == 0) & (y[i][j+1] == 0)) or ((y[i-1][j] == 0) & (y[i+1][j] == 0)): y[i][j] = 0 else: y[i][j] = (y[i][j-1] + y[i][j+1]) / 2 + (y[i-1][j] + y[i+1][j])/10 cv2.imwrite("processed.png", y) if __name__ == "__main__": load_data(data_dir) dimension = len(data[0]) nn = model.Model(dimension).double() # Initialize weights uniformly from -1 to 1 nn.apply(weights_init_uniform) train_loader, val_loader = split_load_data(data, labels) # train_save(nn, train_loader, val_loader) predict_bitmapping(nn, weights='weights.pt', x=input, resulution=(50, 53), threshold=False, denoise=False) denoise('bitmapping.csv', (50, 53)) # result = mask('ml/prediction_Kevin.png', mask_threshold) # io.imshow(result) # plt.show()
def main(): s = { 'nh1': 300, 'nh2': 300, 'win': 3, 'emb_dimension': 300, 'lr': 0.001, 'lr_decay': 0.5, 'max_grad_norm': 5, 'seed': 345, 'nepochs': 50, 'batch_size': 16, 'keep_prob': 1.0, 'check_dir': './checkpoints/GZ_EMNLP2016/semeval_0.001_16', 'display_test_per': 1, 'lr_decay_per': 5 } # load the dataset #data_set_file = 'data/ACL2017/inspec/inspec_t_a_GZ_data_set.pkl' #emb_file = 'data/ACL2017/inspec/inspec_t_a_GZ_embedding.pkl' # data_set_file = 'data/ACL2017/krapivin/krapivin_t_a_allwords_data_set.pkl' # emb_file = 'data/ACL2017/ACL2017_t_a_embedding.pkl' # data_set_file = 'data/ACL2017/krapivin/krapivin_t_a_GZ_data_set.pkl' # emb_file = 'data/ACL2017/krapivin/krapivin_t_a_GZ_embedding.pkl' #data_set_file = 'data/ACL2017/kp20k/kp20k_t_a_allwords_data_set.pkl' #emb_file = 'data/ACL2017/kp20k/ACL2017_t_a_embedding.pkl' data_set_file = 'data/ACL2017/semeval/semeval_t_a_GZ_data_set.pkl' emb_file = 'data/ACL2017/semeval/semeval_t_a_GZ_embedding.pkl' # train_set, test_set, dic, embedding = load.atisfold(data_set_file, emb_file) #data_set_file = 'data/ACL2017/nus/nus_t_a_GZ_data_set.pkl' #emb_file = 'data/ACL2017/nus/nus_t_a_GZ_embedding.pkl' # train_set, test_set, dic, embedding = load.atisfold(data_set_file, emb_file) print('loading dataset.....') train_set, valid_set, test_set, dic, embedding = load.atisfold_ACL2017( data_set_file, emb_file) # idx2label = dict((k,v) for v,k in dic['labels2idx'].items()) # idx2word = dict((k,v) for v,k in dic['words2idx'].items()) # vocab = set(dic['words2idx'].keys()) # vocsize = len(vocab) test_lex, test_y, test_z = test_set # test_lex = test_lex[:1000] # test_y = test_y[:1000] # test_z = test_z[:1000] y_nclasses = 2 z_nclasses = 5 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False, allow_soft_placement=True) with tf.Session(config=config) as sess: rnn = model.Model(nh1=s['nh1'], nh2=s['nh2'], ny=y_nclasses, nz=z_nclasses, de=s['emb_dimension'], cs=s['win'], lr=s['lr'], lr_decay=s['lr_decay'], embedding=embedding, max_gradient_norm=s['max_grad_norm'], batch_size=s['batch_size'], model_cell='lstm') checkpoint_dir = s['check_dir'] logfile = open(str(s['check_dir']) + '/predict_log_NEW.txt', 'a', encoding='utf-8') saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path) logfile.write(str(ckpt.model_checkpoint_path) + '\n') saver.restore(sess, ckpt.model_checkpoint_path) def dev_step(cwords): feed = { rnn.input_x: cwords, rnn.keep_prob: s['keep_prob'], # rnn.batch_size:s['batch_size'] } fetches = rnn.sz_pred sz_pred = sess.run(fetches=fetches, feed_dict=feed) return sz_pred predictions_test = [] groundtruth_test = [] start_num = 0 steps = len(test_lex) // s['batch_size'] # for batch in tl.iterate.minibatches(test_lex, test_z, batch_size=s['batch_size']): print('testing............') for step in range(steps): # batch = batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size']) # x, z = batch x, z = test_batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size']) x = load.pad_sentences(x) x = tools.contextwin_2(x, s['win']) predictions_test.extend(dev_step(x)) groundtruth_test.extend(z) start_num += s['batch_size'] if step % 100 == 0: print('tested %d batch......' % step) print('dataset: ' + data_set_file) logfile.write('dataset: ' + data_set_file + '\n') print("result:") logfile.write("result:\n") # res_test = tools.conlleval(predictions_test, groundtruth_test) res_test = tools.conlleval(predictions_test, groundtruth_test) print('all: ', res_test) logfile.write('all: ' + str(res_test) + '\n') res_test_top5 = tools.conlleval_top(predictions_test, groundtruth_test, 5) print('top5: ', res_test_top5) logfile.write('top5: ' + str(res_test_top5) + '\n') res_test_top10 = tools.conlleval_top(predictions_test, groundtruth_test, 10) print('top10: ', res_test_top10) logfile.write('top10: ' + str(res_test_top10) + '\n') logfile.write( '-----------------------------------------------------------------------------------------------------------------------' + '\n') logfile.close()
def main(): s = { 'nh1':300, 'nh2':300, 'win':3, 'emb_dimension':300, 'lr':0.01, 'lr_decay':0.5, 'max_grad_norm':5, 'seed':345, 'nepochs':50, 'batch_size':16, 'keep_prob':1.0, 'check_dir':'./checkpoints/GZ_EMNLP2016/semeval_0.001_16OLD', 'display_test_per':1, 'lr_decay_per':5 } # load the dataset # data_set_file = 'CNTN/data/inspec_wo_stem/data_set.pkl' # emb_file = 'CNTN/data/inspec_wo_stem/embedding.pkl' data_set_file = 'data/ACL2017/semeval/semeval_t_a_GZ_data_set.pkl' emb_file = 'data/ACL2017/semeval/semeval_t_a_GZ_embedding.pkl' testPath = 'data/ACL2017/semeval/semeval_test.json' #data_set_file = 'data/ACL2017/kp20k/kp20k_t_a_allwords_data_set.pkl' #emb_file = 'data/ACL2017/kp20k/ACL2017_t_a_embedding.pkl' logFile = open('data/logEMNLP2016.txt', 'w', encoding='utf-8') # train_set, test_set, dic, embedding = load.atisfold(data_set_file, emb_file) testJsonFile = open(testPath, 'r', encoding='utf-8') testLines = testJsonFile.readlines() testJsonFile.close() print('loading dataset.....') train_set, valid_set, test_set, dic, embedding = load.atisfold_ACL2017(data_set_file, emb_file) test_lex, test_y, test_z = test_set y_nclasses = 2 z_nclasses = 5 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0) config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False, allow_soft_placement=True) ########################################### with tf.Session(config=config) as sess: rnn = model.Model( nh1=s['nh1'], nh2=s['nh2'], ny=y_nclasses, nz=z_nclasses, de=s['emb_dimension'], cs=s['win'], lr=s['lr'], lr_decay=s['lr_decay'], embedding=embedding, max_gradient_norm=s['max_grad_norm'], batch_size=s['batch_size'], model_cell='lstm' ) checkpoint_dir = s['check_dir'] saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # print(ckpt.all_model_checkpoint_paths[4]) print(ckpt.model_checkpoint_path) logFile.write(ckpt.model_checkpoint_path + '\n') saver.restore(sess, ckpt.model_checkpoint_path) def dev_step(cwords): feed={ rnn.input_x:cwords, rnn.keep_prob: s['keep_prob'], # rnn.batch_size:s['batch_size'] } fetches=rnn.sz_pred sz_pred=sess.run(fetches=fetches,feed_dict=feed) return sz_pred predictions_test = [] groundtruth_test = [] start_num = 0 indexInBatch = 0 steps = len(test_lex) // s['batch_size'] # for batch in tl.iterate.minibatches(test_lex, test_z, batch_size=s['batch_size']): print('testing............') logFile.write('testing............\n') for step in range(6): x, z = test_batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size']) x = load.pad_sentences(x) x = tools.contextwin_2(x, s['win']) predictions_test.extend(dev_step(x)) groundtruth_test.extend(z) start_num += s['batch_size'] if step % 100 == 0: print('tested %d batch......' % (step//100)) logFile.write('tested %d batch......\n' % (step//100)) while indexInBatch < len(predictions_test): curGoodNum = 0 curPreKp = [] curJsonData = json.loads(testLines[indexInBatch]) #curLineList = (curJsonData["title"].strip().lower() + ' ' + curJsonData["abstract"].strip().lower()).split(' |,|.|:') #curLineList = re.split('[ ,.:]', ) curLineList = nltk.word_tokenize((curJsonData["title"].strip().lower() + ' ' + curJsonData["abstract"].strip().lower())) #curLineList = curJsonData["abstract"].split(' ') print('indexOfLine is :', indexInBatch) print('len of curLineList is %d' % len(curLineList)) print('len of predictions_test[%d] is %d' % (indexInBatch, len(predictions_test[indexInBatch]))) print('len of groundtruth_test[%d] is %d' % (indexInBatch, len(groundtruth_test[indexInBatch]))) lenOfLine = min(len(predictions_test[indexInBatch]), len(groundtruth_test[indexInBatch]), len(curLineList)) print(predictions_test[indexInBatch]) print(groundtruth_test[indexInBatch]) logFile.write('indexOfLine is : %s \n' % indexInBatch) logFile.write('len of curLineList is %d \n' % len(curLineList)) logFile.write('len of predictions_test[%d] is %d \n' % (indexInBatch, len(predictions_test[indexInBatch]))) logFile.write('len of groundtruth_test[%d] is %d \n' % (indexInBatch, len(groundtruth_test[indexInBatch]))) logFile.write(str(predictions_test[indexInBatch]) + '\n') logFile.write(str(groundtruth_test[indexInBatch]) + '\n') tmpStack = [] for j in range(lenOfLine): if predictions_test[indexInBatch][j] == 4: curPreKp.append(curLineList[j]) tmpStack = [] elif predictions_test[indexInBatch][j] == 1 and len(tmpStack) == 0: tmpStack.append(curLineList[j]) elif predictions_test[indexInBatch][j] == 2 and len(tmpStack) != 0: tmpStack.append(curLineList[j]) elif predictions_test[indexInBatch][j] == 3 and len(tmpStack) != 0: tmpStack.append(curLineList[j]) curPreKp.append(' '.join(tmpStack)) tmpStack = [] else: tmpStack = [] if predictions_test[indexInBatch][j] != 0 and predictions_test[indexInBatch][j] == groundtruth_test[indexInBatch][j]: curGoodNum += 1 print('curGoodNum is ', curGoodNum) print('predict keyphrase is :', curPreKp) print('ground truth is :', curJsonData['keywords'].split(';')) print('=======================================================================================================================================') logFile.write('curGoodNum is %d \n' % curGoodNum) logFile.write('predict keyphrase is ' + str(curPreKp) + '\n') logFile.write('ground truth is :' + str(curJsonData['keywords'].split(';')) + '\n') logFile.write('=======================================================================================================================================\n') indexInBatch += 1 logFile.close() '''
def main(_): # 设置模型训练时输出信息等级 tf.logging.set_verbosity(tf.logging.INFO) ######################## # 文件夹及数据集路径检查 # ######################## prepare_training_dir() ######################## # 数据集准备 # ######################## charset = read_charset( os.path.join(FLAGS.path_dataset_root, DEFAULT_CONFIG['charset_filename'])) print('chinese dict is as follows:') print(json.dumps(charset, ensure_ascii=False, encoding='UTF-8')) train_image_batch, train_label_batch, tfrecord_files = batch_input( 'train', FLAGS.train_batch_size, len(charset), FLAGS.path_dataset_root, None) train_one_hot = slim.one_hot_encoding(train_label_batch, len(charset)) # val_image_batch, val_label_batch = batch_input('val', FLAGS.val_batch_size, len(charset), # FLAGS.path_dataset_root, None) ######################## # 模型构建 # ######################## shape_img = DEFAULT_CONFIG['image_shape'] max_sequence_length = DEFAULT_CONFIG['max_sequence_length'] pl_image = tf.placeholder( tf.float32, shape=[None, shape_img[0], shape_img[1], shape_img[2]], name='pl_image') pl_label = tf.placeholder(tf.int64, shape=[None, max_sequence_length], name='pl_label') one_hot_label = slim.one_hot_encoding(pl_label, len(charset)) ocr_model = model.Model(num_char_classes=len(charset), seq_length=DEFAULT_CONFIG['max_sequence_length'], num_views=DEFAULT_CONFIG['num_of_views'], null_code=DEFAULT_CONFIG['null_code'], mparams=create_mparams()) endpoints = ocr_model.create_base(pl_image, one_hot_label) chars_logit = endpoints.chars_logit predicted_text = endpoints.predicted_text total_loss = ocr_model.create_loss_v2(pl_label, endpoints) ######################## # 优化器配置 # ######################## global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False, dtype=tf.int32) optimizer = create_optimizer() grads = optimizer.compute_gradients(total_loss) train_op = optimizer.apply_gradients(grads, global_step=global_step) ######################## # 配置并开始训练 # ######################## saver = tf.train.Saver(tf.global_variables(), max_to_keep=10) config = tf.ConfigProto() config.allow_soft_placement = True config.gpu_options.allow_growth = True sess = tf.Session(config=config) # 权重参数初始化 sess.run(tf.local_variables_initializer()) step = _init_weight(sess) print('tfrecord files for training: {}'.format(sess.run(tfrecord_files))) # 线程协调器 coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # 训练集样本总数 num_per_epoch = DEFAULT_CONFIG['splits']['train']['size'] num_per_step = FLAGS.train_batch_size epoch = step * num_per_step // num_per_epoch while epoch < FLAGS.num_epochs: batch_img_train, batch_label_train = sess.run( [train_image_batch, train_label_batch]) # 模型在验证集上的评估 ########### # 模型训练信息显示 if step % FLAGS.display_step == 0: _ = sess.run(train_op, feed_dict={ pl_image: batch_img_train, pl_label: batch_label_train }) loss_train = sess.run(total_loss, feed_dict={ pl_image: batch_img_train, pl_label: batch_label_train }) print('epoch: {}, step: {}, train_loss: {}'.format( epoch, step, loss_train)) else: _ = sess.run(train_op, feed_dict={ pl_image: batch_img_train, pl_label: batch_label_train }) # aa = sess.run(predicted_text, feed_dict={pl_image: batch_img_train, pl_label: batch_label_train}) # bb = sess.run(chars_logit, feed_dict={pl_image: batch_img_train, pl_label: batch_label_train}) # 模型保存 if step % FLAGS.save_step == 0: checkpoint_path = os.path.join(FLAGS.train_log_dir, 'ocr_chinese_model.ckpt') saver.save(sess, checkpoint_path, global_step=step) print('************save model at {} steps'.format(step)) step += 1 epoch = step * num_per_step // num_per_epoch coord.request_stop() coord.join(threads) sess.close()
from train_OCR import create_mparams FLAGS = tf.app.flags.FLAGS """使用ckpt文件评估模型""" if __name__ == '__main__': path_img = './dataset_generate/data_sample/20455828_2605100732.jpg' path_ckpt = './train_logs/ocr_chinese_model.ckpt-1090000' charset = read_charset( os.path.join(FLAGS.path_dataset_root, DEFAULT_CONFIG['charset_filename'])) ocr_model = model.Model(num_char_classes=len(charset), seq_length=DEFAULT_CONFIG['max_sequence_length'], num_views=DEFAULT_CONFIG['num_of_views'], null_code=DEFAULT_CONFIG['null_code'], mparams=create_mparams(), charset=charset) shape_img = DEFAULT_CONFIG['image_shape'] max_sequence_length = DEFAULT_CONFIG['max_sequence_length'] pl_image = tf.placeholder( tf.float32, shape=[None, shape_img[0], shape_img[1], shape_img[2]], name='pl_image') endpoints = ocr_model.create_base(pl_image, labels_one_hot=None) init_fn = ocr_model.create_init_fn_to_restore(path_ckpt) resize_height = DEFAULT_CONFIG['image_shape'][0] resize_width = DEFAULT_CONFIG['image_shape'][1] img = Image.open(path_img) img = np.array(img.resize((resize_width, resize_height), Image.ANTIALIAS),
def main(): s = { 'nh1': 300, 'nh2': 300, 'win': 3, 'emb_dimension': 300, 'lr': 0.1, 'lr_decay': 0.5, 'max_grad_norm': 5, 'seed': 345, 'nepochs': 50, 'batch_size': 16, 'keep_prob': 1.0, 'check_dir': './checkpoints', 'display_test_per': 5, 'lr_decay_per': 10 } # load the dataset train_set, test_set, dic, embedding = load.atisfold() idx2label = dict((k, v) for v, k in dic['labels2idx'].iteritems()) idx2word = dict((k, v) for v, k in dic['words2idx'].iteritems()) vocab = set(dic['words2idx'].keys()) vocsize = len(vocab) test_lex, test_y, test_z = test_set[0:1000] y_nclasses = 2 z_nclasses = 5 with tf.Session() as sess: rnn = model.Model(nh1=s['nh1'], nh2=s['nh2'], ny=y_nclasses, nz=z_nclasses, de=s['emb_dimension'], cs=s['win'], lr=s['lr'], lr_decay=s['lr_decay'], embedding=embedding, max_gradient_norm=s['max_grad_norm'], model_cell='lstm') checkpoint_dir = s['check_dir'] saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) def dev_step(cwords): feed = { rnn.input_x: cwords, rnn.keep_prob: 1.0, rnn.batch_size: s['batch_size'] } fetches = rnn.sz_pred sz_pred = sess.run(fetches=fetches, feed_dict=feed) return sz_pred print("测试结果�") predictions_test = [] groundtruth_test = [] for batch in tl.iterate.minibatches(test_lex, test_z, batch_size=s['batch_size']): x, z = batch x = load.pad_sentences(x) x = tools.contextwin_2(x, s['win']) predictions_test.extend(dev_step(x)) groundtruth_test.extend(z) res_test = tools.conlleval(predictions_test, groundtruth_test, '') print res_test
def main(): s = { 'nh1': 300, 'nh2': 300, 'win': 3, 'emb_dimension': 300, 'lr': 0.1, 'lr_decay': 0.5, 'max_grad_norm': 5, 'seed': 345, 'nepochs': 150, 'batch_size': 16, 'keep_prob': 0.5, 'check_dir': './checkpoints', 'display_test_per': 3, 'lr_decay_per': 10 } train_set, test_set, dic, embedding = load.atisfold() idx2label = dict((k, v) for v, k in dic['labels2idx'].items()) idx2word = dict((k, v) for v, k in dic['words2idx'].items()) train_lex, train_y, train_z = train_set tr = int(len(train_lex) * 0.9) valid_lex, valid_y, valid_z = train_lex[tr:], train_y[tr:], train_z[tr:] train_lex, train_y, train_z = train_lex[:tr], train_y[:tr], train_z[:tr] test_lex, test_y, test_z = test_set print('len(train_data) {}'.format(len(train_lex))) print('len(valid_data) {}'.format(len(valid_lex))) print('len(test_data) {}'.format(len(test_lex))) vocab = set(dic['words2idx'].keys()) vocsize = len(vocab) print('len(vocab) {}'.format(vocsize)) print("Train started!") y_nclasses = 2 z_nclasses = 5 nsentences = len(train_lex) # tf.reset_default_graph() print('#' * 30) with tf.Session() as sess: rnn = model.Model(nh1=s['nh1'], nh2=s['nh2'], ny=y_nclasses, nz=z_nclasses, de=s['emb_dimension'], cs=s['win'], lr=s['lr'], lr_decay=s['lr_decay'], embedding=embedding, max_gradient_norm=s['max_grad_norm'], model_cell='lstm') checkpoint_dir = s['check_dir'] if not os.path.exists(checkpoint_dir): os.mkdir(checkpoint_dir) checkpoint_prefix = os.path.join(checkpoint_dir, 'model') print("*" * 30) def train_step(cwords, label_y, label_z): feed = { rnn.input_x: cwords, rnn.input_y: label_y, rnn.input_z: label_z, rnn.keep_prob: s['keep_prob'], rnn.batch_size: s['batch_size'] } fetches = [rnn.loss, rnn.train_op] loss, _ = sess.run(fetches=fetches, feed_dict=feed) return loss def dev_step(cwords): feed = { rnn.input_x: cwords, rnn.keep_prob: 1.0, rnn.batch_size: s['batch_size'] } fetches = rnn.sz_pred sz_pred = sess.run(fetches=fetches, feed_dict=feed) return sz_pred saver = tf.train.Saver(tf.all_variables()) sess.run(tf.initialize_all_variables()) best_f = -1 best_e = 0 test_best_f = -1 test_best_e = 0 best_res = None test_best_res = None for e in range(s['nepochs']): tools.shuffle([train_lex, train_y, train_z], s['seed']) t_start = time.time() for step, batch in enumerate( tl.iterate.minibatches(train_lex, list(zip(train_y, train_z)), batch_size=s['batch_size'])): input_x, target = batch label_y, label_z = zip(*target) input_x = load.pad_sentences(input_x) label_y = load.pad_sentences(label_y) label_z = load.pad_sentences(label_z) # print(type(input_x)) # print(type(s['win'])) # print(input_x) # print(s['win']) cwords = tools.contextwin_2((input_x), s['win']) #print(s['batch_size']) loss = train_step(cwords, label_y, label_z) print( 'loss %.2f' % loss, ' [learning] epoch %i>> %2.2f%%' % (e, s['batch_size'] * step * 100. / nsentences), 'completed in %.2f (sec) <<\r' % (time.time() - t_start), ) sys.stdout.flush() #VALID predictions_valid = [] predictions_test = [] groundtruth_valid = [] groundtruth_test = [] for batch in tl.iterate.minibatches(valid_lex, valid_z, batch_size=s['batch_size']): x, z = batch x = load.pad_sentences(x) x = tools.contextwin_2(x, s['win']) predictions_valid.extend(dev_step(x)) groundtruth_valid.extend(z) res_valid = tools.conlleval(predictions_valid, groundtruth_valid, '') if res_valid['f'] > best_f: best_f = res_valid['f'] best_e = e best_res = res_valid print('\nVALID new best:', res_valid) path = saver.save(sess=sess, save_path=checkpoint_prefix, global_step=e) print("Save model checkpoint to {}".format(path)) else: print('\nVALID new curr:', res_valid) #TEST if e % s['display_test_per'] == 0: for batch in tl.iterate.minibatches( test_lex, test_z, batch_size=s['batch_size']): x, z = batch x = load.pad_sentences(x) x = tools.contextwin_2(x, s['win']) predictions_test.extend(dev_step(x)) groundtruth_test.extend(z) res_test = tools.conlleval(predictions_test, groundtruth_test, '') if res_test['f'] > test_best_f: test_best_f = res_test['f'] test_best_e = e test_best_res = res_test print('TEST new best:', res_test) else: print('TEST new curr:', res_test) # learning rate decay if no improvement in 10 epochs if e - best_e > s['lr_decay_per']: sess.run(fetches=rnn.learning_rate_decay_op) lr = sess.run(fetches=rnn.lr) print('learning rate:%f' % lr) if lr < 1e-5: break print() print("Train finished!") print('Valid Best Result: epoch %d: ' % (best_e), best_res) print('Test Best Result: epoch %d: ' % (test_best_e), test_best_res)
from models import model from handlers import config_pattern class Application(web.Application): def __init__(self, model): self.model = model #config setting from options settings = dict(template_path=os.path.join(os.path.dirname(__file__), "templates"), cookie_secret=options.cookie_secret, static_path=os.path.join(os.path.dirname(__file__), "static")) #setup DB # self.db = torndb.Connection("%s:%s" % (options.mysql["host"], options.mysql["port"]), options.mysql["database"], user=options.mysql["user"], password=options.mysql["password"], charset='utf8') #handlersPattern handlers = config_pattern.handlersPattern super(Application, self).__init__(handlers, **settings) if __name__ == '__main__': myOptions.parse_options() app = Application(model.Model()) app.listen(options.port) IOLoop.instance().start()
def resume(self, url, image_recognition): m = model.Model() m.process_website(url, True, image_recognition)
def start(self, url, image_recognition): m = model.Model() m.process_website(url, False, image_recognition)
collate_fn=dataset.alignCollate()) if args.resume is not None: print('loading pretrained class from {}'.format(args.resume)) checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage) args.alphabet = checkpoint['alphabet'] del checkpoint else: args.alphabet = util.get_vocab(root=args.root, label=args.train_label) args.num_class = len(args.alphabet) + 1 converter = convert.strLabelConverter(args.alphabet) model = model.Model(num_classes=args.num_class, fixed_height=args.height, net=args.net) model = dcrnn.Model(n_classes=args.num_class, fixed_height=args.height) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.5, 0.999)) if args.resume is not None: print('loading pretrained model from {}'.format(args.resume)) checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint['state_dict']) del checkpoint criterion = CTCLoss() global image, text, length
# if lst1[i] and lst2[i]: #same and 1 # count+=1 # elif (not lst1[i] and lst2[i]) or (lst1[i] and not lst2[i]): #one of them has 1 and diff # diff+=1 # return count/diff def jaccard(lst1, lst2): lst1 = np.asarray(lst1) lst2 = np.asarray(lst2) return np.double(np.bitwise_and(lst1, lst2).sum()) / np.double( np.bitwise_or(lst1, lst2).sum()) if __name__ == '__main__': reuterdicpath = os.path.dirname(os.path.dirname( os.path.realpath(__file__))) + "\\dictionaryBuilding\\reutersdic.json" with open(reuterdicpath, 'r') as f: dic = json.load(f) inverted_index = model.Model('vsm').buildIndex(dic) print(len(inverted_index)) expander = QueryExpansion(dic, inverted_index) # with open('unique_words.json', 'w')as f: # words=expander.unique_words() # json.dump(words,f) # with open('docvec.json', 'w') as f: # json.dump(expander.get_doc_vector(), f) # expander.build_thesaurus() # print(expander.build_thesaurus()) with open('reuters_theaurus.json', 'w') as f: # json.dump(expander.build_thesaurus(), f)
def test_get_client(self): ds = model.Model().get_client() query = ds.query(kind='courses') result = list(query.fetch()) assert 1 < 2
import copy # from tqdm import tqdm config = DefaultConfig() if config.use_hyperboard: from hyperboard import Agent agent = Agent(username='******', password='******', port=5005) parameter = config.todict() validate_loss_record = agent.register(parameter, 'loss', overwrite=True) train_dataset = dataset.MyDataset() validate_dataset = dataset.MyDataset() criticer = torch.nn.MSELoss() model = model.Model() optimizer = optim.Adam(model.parameters(), lr=config.lr) if config.gpu >= 0: model.cuda(config.gpu) max_loss = 0 no_gain = 0 global_step = 0 train_num = len(train_dataset) model.train() for epoch in range(config.epoch_num): train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
def create_model(config): net = model.Model(config.n_classes, feature_extractor=config.feature_extractor, metric_learning=config.metric_learning) return net
def main(): s = { 'nh1': 300, 'nh2': 300, 'win': 3, 'emb_dimension': 300, 'lr': 0.01, 'lr_decay': 0.5, # 'max_grad_norm': 5, # 'seed': 345, # 'nepochs': 50, 'batch_size': 16, 'keep_prob': 0.5, 'check_dir': './checkpoints/GZ_EMNLP2016/kp20k_0.01_16', 'display_test_per': 1, # 'lr_decay_per': 5 # } # data_set_file = 'data/ACL2017/inspec/inspec_t_a_GZ_data_set.pkl' # emb_file = 'data/ACL2017/inspec/inspec_t_a_GZ_embedding.pkl' # data_set_file = 'data/ACL2017/semeval/semeval_t_a_GZ_data_set.pkl' #emb_file = 'data/ACL2017/semeval/semeval_t_a_GZ_embedding.pkl' data_set_file = 'data/ACL2017/kp20k/kp20k_t_a_allwords_data_set.pkl' emb_file = 'data/ACL2017/kp20k/ACL2017_t_a_embedding.pkl' #data_set_file = 'data/ACL2017/inspec/inspec_t_a_GZ_data_set.pkl' #emb_file = 'data/ACL2017/inspec/inspec_t_a_GZ_embedding.pkl' # data_set_file = 'data/ACL2017/semeval/semeval_t_a_GZ_data_set.pkl' # emb_file = 'data/ACL2017/semeval/semeval_t_a_GZ_embedding.pkl' #data_set_file = 'data/ACL2017/nus/nus_t_a_GZ_data_set.pkl' #emb_file = 'data/ACL2017/nus/nus_t_a_GZ_embedding.pkl' #data_set_file = 'data/ACL2017/krapivin/krapivin_t_a_GZ_data_set.pkl' #emb_file = 'data/ACL2017/krapivin/krapivin_t_a_GZ_embedding.pkl' print('loading dataset.....') # train_set,test_set,dic,embedding = load.atisfold(data_set_file, emb_file) train_set, valid_set, test_set, dic, embedding = load.atisfold_ACL2017( data_set_file, emb_file) # idx2label = dict((k,v) for v,k in dic['labels2idx'].iteritems()) # idx2word = dict((k,v) for v,k in dic['words2idx'].iteritems()) train_lex, train_y, train_z = train_set # train_lex: [[每条tweet的word的idx],[每条tweet的word的idx]], train_y: [[关键词的位置为1]], train_z: [[关键词的位置为0~4(开头、结尾...)]] # tr = int(len(train_lex)*0.9) # valid_lex, valid_y, valid_z = train_lex[tr:], train_y[tr:], train_z[tr:] # train_lex, train_y, train_z = train_lex[:tr], train_y[:tr], train_z[:tr] # test_lex, test_y, test_z = test_set valid_lex, valid_y, valid_z = valid_set test_lex, test_y, test_z = test_set log_dir = s['check_dir'] if not os.path.exists(log_dir): os.mkdir(log_dir) logfile = open(str(s['check_dir']) + '/log.txt', 'a', encoding='utf-8', buffering=1) print('len(train_data) {}'.format(len(train_lex))) print('len(valid_data) {}'.format(len(valid_lex))) print('len(test_data) {}'.format(len(test_lex))) logfile.write('len(train_data) {}\n'.format(len(train_lex))) logfile.write('len(valid_data) {}\n'.format(len(valid_lex))) logfile.write('len(test_data) {}\n'.format(len(test_lex))) vocab = set(dic['words2idx'].keys()) vocsize = len(vocab) print('len(vocab) {}'.format(vocsize)) print("Train started!") logfile.write('len(vocab) {}\n'.format(vocsize)) logfile.write("Train started!\n") y_nclasses = 2 z_nclasses = 5 nsentences = len(train_lex) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, allow_soft_placement=True) ########################################### with tf.compat.v1.Session( config=config) as sess: ##################################### rnn = model.Model(nh1=s['nh1'], nh2=s['nh2'], ny=y_nclasses, nz=z_nclasses, de=s['emb_dimension'], cs=s['win'], lr=s['lr'], lr_decay=s['lr_decay'], embedding=embedding, max_gradient_norm=s['max_grad_norm'], batch_size=s['batch_size'], model_cell='lstm') # my_model = mymodel.myModel( # # nh1=s['nh1'], # # nh2=s['nh2'], # # ny=y_nclasses, # # nz=z_nclasses, # de=s['emb_dimension'], # lr=s['lr'], # lr_decay=s['lr_decay'], # embedding=embedding, # max_gradient_norm=s['max_grad_norm'], # keep_prob=s['keep_prob'], # model_cell='lstm' # ) # 保存模型 checkpoint_dir = s['check_dir'] if not os.path.exists(checkpoint_dir): os.mkdir(checkpoint_dir) checkpoint_prefix = os.path.join(checkpoint_dir, 'model') def train_step(cwords, label_y, label_z): feed = { rnn.input_x: cwords, rnn.input_y: label_y, rnn.input_z: label_z, rnn.keep_prob: s['keep_prob'] # rnn.batch_size:s['batch_size'] } fetches = [rnn.loss, rnn.train_op] loss, _ = sess.run(fetches=fetches, feed_dict=feed) # _,Loss = sess.run(fetches=fetches, feed_dict=feed) return loss def dev_step(cwords): feed = { rnn.input_x: cwords, rnn.keep_prob: 1.0 # rnn.keep_prob:1.0, # rnn.batch_size:s['batch_size'] } fetches = rnn.sz_pred sz_pred = sess.run(fetches=fetches, feed_dict=feed) return sz_pred saver = tf.train.Saver(tf.all_variables(), max_to_keep=2) sess.run(tf.global_variables_initializer()) best_f = -1 best_e = 0 test_best_f = -1 test_best_e = 0 best_res = None test_best_res = None for e in range(s['nepochs']): tools.shuffle([train_lex, train_y, train_z], s['seed']) t_start = time.time() start_num = 0 # for step,batch in enumerate(tl.iterate.minibatches(train_lex,list(zip(train_y,train_z)),batch_size=s['batch_size'])): # for step, batch in enumerate(batch_putin(train_lex, list(zip(train_y, train_z)), start_num=start_num, batch_size=s['batch_size'])): steps = len(train_lex) // s['batch_size'] for step in range(steps): # batch = batch_putin(train_lex,list(zip(train_y, train_z)), start_num=start_num, batch_size=s['batch_size']) # input_x,target=batch # label_y,label_z=list(zip(*target)) input_x, label_y, label_z = train_batch_putin( train_lex, train_y, train_z, start_num=start_num, batch_size=s['batch_size']) input_x = load.pad_sentences(input_x) label_y = load.pad_sentences(label_y) label_z = load.pad_sentences(label_z) cwords = tools.contextwin_2(input_x, s['win']) # cwords = input_x loss = train_step(cwords, label_y, label_z) start_num += s['batch_size'] print( 'loss %.6f' % loss, ' [learning] epoch %i>> %2.2f%%' % (e, s['batch_size'] * step * 100. / nsentences), 'completed in %.2f (sec) <<\r' % (time.time() - t_start)) if step % 1000 == 0: logfile.write('loss %.6f' % loss) logfile.write( ' [learning] epoch %i>> %2.2f%%' % (e, s['batch_size'] * step * 100. / nsentences)) logfile.write('completed in %.2f (sec) <<\n' % (time.time() - t_start)) # sys.stdout.flush()) #VALID if e >= 0: print('Validing..............') predictions_valid = [] predictions_test = [] groundtruth_valid = [] groundtruth_test = [] start_num = 0 steps = len(valid_lex) // s['batch_size'] # for batch in tl.iterate.minibatches(valid_lex,valid_z,batch_size=s['batch_size']): for step in range(steps): # batch = batch_putin(valid_lex, valid_z, start_num=start_num, batch_size=s['batch_size']) # x,z=batch x, z = test_batch_putin(valid_lex, valid_z, start_num=start_num, batch_size=s['batch_size']) x = load.pad_sentences(x) x = tools.contextwin_2(x, s['win']) predictions_valid.extend(dev_step(x)) groundtruth_valid.extend(z) start_num += s['batch_size'] res_valid = tools.conlleval(predictions_valid, groundtruth_valid) del predictions_valid del groundtruth_valid if res_valid['f'] > best_f: best_f = res_valid['f'] best_e = e best_res = res_valid print('\nVALID new best:', res_valid) logfile.write('\nVALID new best: ' + str(res_valid)) path = saver.save(sess=sess, save_path=checkpoint_prefix, global_step=e) print("Save model checkpoint to {}".format(path)) logfile.write( "\nSave model checkpoint to {}\n".format(path)) else: print('\nVALID new curr:', res_valid) logfile.write('\nVALID new curr: ' + str(res_valid)) #TEST print('Testing..............') start_num = 0 steps = len(test_lex) // s['batch_size'] if e % s['display_test_per'] == 0: # for batch in tl.iterate.minibatches(test_lex, test_z, batch_size=s['batch_size']): for step in range(steps): # batch = batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size']) # x,z = batch x, z = test_batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size']) x = load.pad_sentences(x) x = tools.contextwin_2(x, s['win']) predictions_test.extend(dev_step(x)) groundtruth_test.extend(z) start_num += s['batch_size'] res_test = tools.conlleval(predictions_test, groundtruth_test) if res_test['f'] > test_best_f: test_best_f = res_test['f'] test_best_e = e test_best_res = res_test print('TEST new best:', res_test) logfile.write('\nTEST new best: ' + str(res_test)) else: print('TEST new curr:', res_test) logfile.write('\nTEST new curr: ' + str(res_test)) # learning rate decay if no improvement in 10 epochs if e - best_e > s['lr_decay_per']: sess.run(fetches=rnn.learning_rate_decay_op) lr = sess.run(fetches=rnn.lr) print('learning rate:%f' % lr) logfile.write('\nlearning rate:%f\n' % lr) if lr < 1e-6: break print("Train finished!") print('Valid Best Result: epoch %d: ' % (best_e), best_res) print('Test Best Result: epoch %d: ' % (test_best_e), test_best_res) logfile.write("Train finished!\n") logfile.write('Valid Best Result: epoch %d: ' % (best_e) + str(best_res)) logfile.write('\nTest Best Result: epoch %d: ' % (test_best_e) + str(test_best_res)) logfile.close()