Exemplo n.º 1
0
def filled_db():
    imhere.app.secret_key = str(uuid.uuid4())
    
    um = users_model.Users()
    um.get_or_create_user(stu)
    um.get_or_create_user(newt)

    m = model.Model()
    ds = m.get_client()
        

    key = ds.key('student')
    entity = datastore.Entity(
        key=key)
    entity.update({
        'sid': stu['id'],
        'uni': 'cs4156'
    })
    ds.put(entity)

    key = ds.key('teacher')
    entity = datastore.Entity(
        key=key)
    entity.update({
        'tid': newt['id']
    })
    ds.put(entity)

    tm = teachers_model.Teachers(newt['id'])
    course_name = 'Writing'
    cid = tm.add_course(course_name)
    cm = courses_model.Courses(cid)
    cm.add_student('cs4156')
    yield cid
Exemplo n.º 2
0
    def post(self):
        print(self.request)
        draw = self.json_args['draw']
        start = self.json_args['start']
        length = self.json_args['length']
        order = self.json_args['order'][0]['dir']
        ind_column = self.json_args['order'][0]['column']
        col_name = self.json_args['columns'][ind_column]['data']

        search_data = {}
        search_data['from'] = self.json_args['from']
        search_data['to'] = self.json_args['to']
        search_data['day'] = self.json_args['day']

        if len(filter(lambda x: x != None, search_data.values())) < 3:
            search_data = None

        flights, total_num, filtered = yield tm.list_flights(
            search_data, col_name, start, length, order)
        response = {
            'draw': draw,
            'recordsTotal': total_num,
            'recordsFiltered': filtered,
            'data': flights
        }
        # response = json.dumps(response, default=utils.json_serial)
        # print countries
        # countries = "?"
        self.set_header('Content-Type', 'text/javascript;charset=utf-8')
        self.write(model.Model(response).json())
Exemplo n.º 3
0
def main():
    """Main function"""
    args = parse_args()

    smiles_list = uc.read_smi_file(args.input_smiles_path)

    LOG.info("Building vocabulary")
    tokenizer = mv.SMILESTokenizer()
    vocabulary = mv.create_vocabulary(smiles_list, tokenizer=tokenizer)

    tokens = vocabulary.tokens()
    LOG.info("Vocabulary contains %d tokens: %s", len(tokens), tokens)
    network_params = {
        'num_layers': args.num_layers,
        'layer_size': args.layer_size,
        'cell_type': args.cell_type,
        'embedding_layer_size': args.embedding_layer_size,
        'dropout': args.dropout
    }
    model = mm.Model(no_cuda=True,
                     vocabulary=vocabulary,
                     tokenizer=tokenizer,
                     network_params=network_params,
                     max_sequence_length=args.max_sequence_length)
    LOG.info("Saving model at %s", args.output_model_path)
    model.save(args.output_model_path)
Exemplo n.º 4
0
    def testAverageGradients(self):
        """
        Checks the correct average for multiple towers and multiple variables.

        The test model has 2 towers with 2 variables shared between them.
        var_0 is getting 1.0 + 3.0 as gradient -> average: 2.0
        var_1 is getting 2.0 + 4.0 as gradient -> average: 3.0
        """
        with tf.Graph().as_default():
            with tf.Session() as session:
                test_model = model.Model(self.hparams)
                grad_0 = tf.constant(1.0)
                grad_1 = tf.constant(2.0)
                tower_0 = [(grad_0, 'var_0'), (grad_1, 'var_1')]
                grad_2 = tf.constant(3.0)
                grad_3 = tf.constant(4.0)
                tower_1 = [(grad_2, 'var_0'), (grad_3, 'var_1')]
                tower_grads = [tower_0, tower_1]
                average_grads = test_model._average_gradients(tower_grads)
                self.assertEqual(len(average_grads), 2)
                self.assertEqual('var_0', average_grads[0][1])
                average_grad_0 = session.run(average_grads[0][0])
                self.assertEqual(2.0, average_grad_0)

                self.assertEqual('var_1', average_grads[1][1])
                average_grad_1 = session.run(average_grads[1][0])
                self.assertEqual(3.0, average_grad_1)
Exemplo n.º 5
0
 def delete_session(self, del_seid):
     ds = model.Model().get_client()
     query = ds.query(kind='sessions')
     query.add_filter('seid', '=', int(del_seid))
     results = list(query.fetch())
     for result in results:
         key = ds.key('sessions', int(del_seid))
         ds.delete(key)
Exemplo n.º 6
0
 def upper_partition_course_entity(self):
     ds = model.Model().get_client()
     key = ds.key('courses')
     entity = datastore.Entity(key=key)
     entity.update({'name': "Strings123"})
     ds.put(entity)
     cid = int(entity.key.id)
     entity.update({'name': "Strings123", 'cid': int(cid)})
     ds.put(entity)
     return entity
Exemplo n.º 7
0
    def __init__(self, config_data):
        self._id = config_data['id']
        self._models = {}
        self._settings = {}

        for id, settingConfig in config_data['settings'].items():
            self._settings[id] = typeMap.typeMap[settingConfig['type']](settingConfig)

        for id, modelConfig in config_data['models'].items():
            self._models[id] = model.Model(modelConfig)
Exemplo n.º 8
0
    def run(self):
        """
        Performs the creation of the model.
        """
        if self._already_run:
            return

        LOG.info("Building vocabulary")
        tokenizer = mv.SMILESTokenizer()
        vocabulary = mv.create_vocabulary(self._smiles_list,
                                          tokenizer=tokenizer)

        tokens = vocabulary.tokens()
        LOG.info("Vocabulary contains %d tokens: %s", len(tokens), tokens)
        LOG.info("Saving model at %s", self._output_model_path)
        network_params = {
            'num_layers': self._num_layers,
            'layer_size': self._layer_size,
            'embedding_layer_size': self._embedding_layer_size,
            'dropout': self._dropout,
            'memory_cells': self._memory_cells,
            'cell_size': self._cell_size,
            'read_heads': self._read_heads,
            'num_controller_layers': self._num_controller_layers,
            'controller_type': self._controller_type,
            'model_type': self._model_type
        }
        model = mm.Model(vocabulary=vocabulary,
                         tokenizer=tokenizer,
                         network_params=network_params,
                         model_type=self._model_type,
                         max_sequence_length=self._max_sequence_length)

        model_folder = model.model_name.split('.')[0]
        storage_folder_path = os.path.join(self._output_model_path,
                                           model_folder)
        i = 0
        while os.path.exists(storage_folder_path):
            if i == 0:
                storage_folder_path += '(%s)' % i
            else:
                cut_path = storage_folder_path[:-3]
                storage_folder_path = cut_path + '(%s)' % i
            i += 1

        os.makedirs(storage_folder_path)
        self._output_model_path = os.path.join(storage_folder_path,
                                               model.model_name)
        model.model_dir = storage_folder_path

        model.save(self._output_model_path)
        LOG.info('Model saved!')
        LOG.info(model.__dict__)
Exemplo n.º 9
0
 def test_print(self):
     ds = model.Model().get_client()
     query = ds.query(kind='sessions')
     #query.add_filter('date', '=', self.date)
     #query.add_filter('cid', '=', 68)
     result = list(query.fetch())
     print(
         '\n' +
         'Sessions ================================================================================================='
     )
     for session in result:
         print(session)
Exemplo n.º 10
0
 def test_get_current_roster_size(self):
     ssm = sessions_model.Sessions()
     cid = self.upper_partition_course_entity()['cid']
     seid = ssm.open_session(cid)
     assert ssm.get_current_roster_size() == 0
     ds = model.Model().get_client()
     key = ds.key('enrolled_in')
     entity = datastore.Entity(key=key)
     entity.update({'sid': 653, 'cid': cid})
     ds.put(entity)
     assert ssm.get_current_roster_size() == 1
     self.delete_session(seid)
Exemplo n.º 11
0
    def testModelInitialization(self):
        """Checks the variables declared in the init method.

          Initialization step should only declare global_step as a non
          trainable variable.
        """
        with tf.Graph().as_default():
            model.Model(self.hparams)
            trainable_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES)
            self.assertEqual(len(trainable_vars), 0)
            global_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            self.assertEqual(len(global_vars), 1)
            self.assertStartsWith(global_vars[0].name, 'global_step')
Exemplo n.º 12
0
def register():
    if request.method == 'GET':
        return render_template(
                'register.html',
                name=flask.session['google_user']['name'],
                is_student=flask.session['is_student'],
                is_teacher=flask.session['is_teacher']
        )

    elif request.method == 'POST':
        m = model.Model()
        ds = m.get_client()
        if request.form['type'] == 'student':
            # check that uni doesn't already exist
            # if it doesn't, continue student creation
            um = users_model.Users()
            if not um.is_valid_uni(request.form['uni']):
                key = ds.key('student')
                entity = datastore.Entity(
                    key=key)
                entity.update({
                    'sid': flask.session['id'],
                    'uni': request.form['uni']
                })
                ds.put(entity)

                flask.session['is_student'] = True
                return flask.redirect(flask.url_for('main_student'))
            else:
                return render_template(
                        'register.html',
                        name=flask.session['google_user']['name'],
                        invalid_uni=True)

        else:
            try:
                key = ds.key('teacher')
                entity = datastore.Entity(
                    key=key)
                entity.update({
                    'tid': flask.session['id']
                })
                ds.put(entity)
                flask.session['is_teacher'] = True
            except:
                pass
            return flask.redirect(flask.url_for('main_teacher'))
Exemplo n.º 13
0
    def run(self):
        """
        Carries out the creation of the model.
        """

        tokenizer = voc.SMILESTokenizer()
        vocabulary = voc.create_vocabulary(self._smiles_list, tokenizer=tokenizer)

        network_params = {
            'num_layers': self._num_layers,
            'layer_size': self._layer_size,
            'cell_type': self._cell_type,
            'embedding_layer_size': self._embedding_layer_size,
            'dropout': self._dropout,
            'layer_normalization': self._layer_normalization
        }
        model = reinvent.Model(no_cuda=True, vocabulary=vocabulary, tokenizer=tokenizer, network_params=network_params, max_sequence_length=self._max_sequence_length)
        model.save(self._output_model_path)
        return model
Exemplo n.º 14
0
 def lower_partition_session_entity(self):
     ds = model.Model().get_client()
     key = ds.key('sessions')
     entity = datastore.Entity(key=key)
     entity.update({
         'date': '',
         'cid': 99999999,
         'window_open': False,
         'self.secret': -1
     })
     ds.put(entity)
     seid = int(entity.key.id)
     entity.update({
         'date': '',
         'cid': 99999999,
         'window_open': False,
         'self.secret': -1,
         'seid': int(seid)
     })
     ds.put(entity)
     return entity
Exemplo n.º 15
0
    for i in range(1, shape[0]-1):
        for j in range(1, shape[1]-1):
            if ((y[i][j-1] == 0) & (y[i][j+1] == 0)) or ((y[i-1][j] == 0) & (y[i+1][j] == 0)):
                y[i][j] = 0

            else:
                y[i][j] = (y[i][j-1] + y[i][j+1]) / 2 + (y[i-1][j] + y[i+1][j])/10

    cv2.imwrite("processed.png", y)


if __name__ == "__main__":
    load_data(data_dir)
    dimension = len(data[0])

    nn = model.Model(dimension).double()

    # Initialize weights uniformly from -1 to 1
    nn.apply(weights_init_uniform)

    train_loader, val_loader = split_load_data(data, labels)

    # train_save(nn, train_loader, val_loader)
    predict_bitmapping(nn, weights='weights.pt', x=input, resulution=(50, 53),
                       threshold=False, denoise=False)

    denoise('bitmapping.csv', (50, 53))

    # result = mask('ml/prediction_Kevin.png', mask_threshold)
    # io.imshow(result)
    # plt.show()
Exemplo n.º 16
0
def main():
    s = {
        'nh1': 300,
        'nh2': 300,
        'win': 3,
        'emb_dimension': 300,
        'lr': 0.001,
        'lr_decay': 0.5,
        'max_grad_norm': 5,
        'seed': 345,
        'nepochs': 50,
        'batch_size': 16,
        'keep_prob': 1.0,
        'check_dir': './checkpoints/GZ_EMNLP2016/semeval_0.001_16',
        'display_test_per': 1,
        'lr_decay_per': 5
    }

    # load the dataset
    #data_set_file = 'data/ACL2017/inspec/inspec_t_a_GZ_data_set.pkl'
    #emb_file = 'data/ACL2017/inspec/inspec_t_a_GZ_embedding.pkl'
    # data_set_file = 'data/ACL2017/krapivin/krapivin_t_a_allwords_data_set.pkl'
    # emb_file = 'data/ACL2017/ACL2017_t_a_embedding.pkl'
    # data_set_file = 'data/ACL2017/krapivin/krapivin_t_a_GZ_data_set.pkl'
    # emb_file = 'data/ACL2017/krapivin/krapivin_t_a_GZ_embedding.pkl'
    #data_set_file = 'data/ACL2017/kp20k/kp20k_t_a_allwords_data_set.pkl'
    #emb_file = 'data/ACL2017/kp20k/ACL2017_t_a_embedding.pkl'
    data_set_file = 'data/ACL2017/semeval/semeval_t_a_GZ_data_set.pkl'
    emb_file = 'data/ACL2017/semeval/semeval_t_a_GZ_embedding.pkl'
    # train_set, test_set, dic, embedding = load.atisfold(data_set_file, emb_file)
    #data_set_file = 'data/ACL2017/nus/nus_t_a_GZ_data_set.pkl'
    #emb_file = 'data/ACL2017/nus/nus_t_a_GZ_embedding.pkl'
    # train_set, test_set, dic, embedding = load.atisfold(data_set_file, emb_file)
    print('loading dataset.....')
    train_set, valid_set, test_set, dic, embedding = load.atisfold_ACL2017(
        data_set_file, emb_file)
    # idx2label = dict((k,v) for v,k in dic['labels2idx'].items())
    # idx2word  = dict((k,v) for v,k in dic['words2idx'].items())

    # vocab = set(dic['words2idx'].keys())
    # vocsize = len(vocab)
    test_lex, test_y, test_z = test_set
    # test_lex = test_lex[:1000]
    # test_y = test_y[:1000]
    # test_z = test_z[:1000]

    y_nclasses = 2
    z_nclasses = 5

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
    config = tf.ConfigProto(gpu_options=gpu_options,
                            log_device_placement=False,
                            allow_soft_placement=True)
    with tf.Session(config=config) as sess:

        rnn = model.Model(nh1=s['nh1'],
                          nh2=s['nh2'],
                          ny=y_nclasses,
                          nz=z_nclasses,
                          de=s['emb_dimension'],
                          cs=s['win'],
                          lr=s['lr'],
                          lr_decay=s['lr_decay'],
                          embedding=embedding,
                          max_gradient_norm=s['max_grad_norm'],
                          batch_size=s['batch_size'],
                          model_cell='lstm')

        checkpoint_dir = s['check_dir']
        logfile = open(str(s['check_dir']) + '/predict_log_NEW.txt',
                       'a',
                       encoding='utf-8')
        saver = tf.train.Saver(tf.all_variables())
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print(ckpt.model_checkpoint_path)
            logfile.write(str(ckpt.model_checkpoint_path) + '\n')
            saver.restore(sess, ckpt.model_checkpoint_path)

        def dev_step(cwords):
            feed = {
                rnn.input_x: cwords,
                rnn.keep_prob: s['keep_prob'],
                # rnn.batch_size:s['batch_size']
            }
            fetches = rnn.sz_pred
            sz_pred = sess.run(fetches=fetches, feed_dict=feed)
            return sz_pred

        predictions_test = []
        groundtruth_test = []
        start_num = 0
        steps = len(test_lex) // s['batch_size']
        # for batch in tl.iterate.minibatches(test_lex, test_z, batch_size=s['batch_size']):
        print('testing............')
        for step in range(steps):
            # batch = batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size'])
            # x, z = batch
            x, z = test_batch_putin(test_lex,
                                    test_z,
                                    start_num=start_num,
                                    batch_size=s['batch_size'])
            x = load.pad_sentences(x)
            x = tools.contextwin_2(x, s['win'])
            predictions_test.extend(dev_step(x))
            groundtruth_test.extend(z)
            start_num += s['batch_size']
            if step % 100 == 0:
                print('tested %d batch......' % step)

        print('dataset: ' + data_set_file)
        logfile.write('dataset: ' + data_set_file + '\n')
        print("result:")
        logfile.write("result:\n")
        # res_test = tools.conlleval(predictions_test, groundtruth_test)
        res_test = tools.conlleval(predictions_test, groundtruth_test)
        print('all: ', res_test)
        logfile.write('all: ' + str(res_test) + '\n')
        res_test_top5 = tools.conlleval_top(predictions_test, groundtruth_test,
                                            5)
        print('top5: ', res_test_top5)
        logfile.write('top5: ' + str(res_test_top5) + '\n')
        res_test_top10 = tools.conlleval_top(predictions_test,
                                             groundtruth_test, 10)
        print('top10: ', res_test_top10)
        logfile.write('top10: ' + str(res_test_top10) + '\n')
        logfile.write(
            '-----------------------------------------------------------------------------------------------------------------------'
            + '\n')
    logfile.close()
Exemplo n.º 17
0
def main():
    s = {
        'nh1':300,
        'nh2':300,
        'win':3,
        'emb_dimension':300,
        'lr':0.01,
        'lr_decay':0.5,
        'max_grad_norm':5,
        'seed':345,
        'nepochs':50,
        'batch_size':16,
        'keep_prob':1.0,
        'check_dir':'./checkpoints/GZ_EMNLP2016/semeval_0.001_16OLD',
        'display_test_per':1,
        'lr_decay_per':5
    }

    # load the dataset
    # data_set_file = 'CNTN/data/inspec_wo_stem/data_set.pkl'
    # emb_file = 'CNTN/data/inspec_wo_stem/embedding.pkl'
    data_set_file = 'data/ACL2017/semeval/semeval_t_a_GZ_data_set.pkl'
    emb_file = 'data/ACL2017/semeval/semeval_t_a_GZ_embedding.pkl'
    testPath = 'data/ACL2017/semeval/semeval_test.json'
    #data_set_file = 'data/ACL2017/kp20k/kp20k_t_a_allwords_data_set.pkl'
    #emb_file = 'data/ACL2017/kp20k/ACL2017_t_a_embedding.pkl'
    logFile = open('data/logEMNLP2016.txt', 'w', encoding='utf-8')
    # train_set, test_set, dic, embedding = load.atisfold(data_set_file, emb_file)
    testJsonFile = open(testPath, 'r', encoding='utf-8')
    testLines = testJsonFile.readlines()
    testJsonFile.close()
    print('loading dataset.....')
    train_set, valid_set, test_set, dic, embedding = load.atisfold_ACL2017(data_set_file, emb_file)
    test_lex, test_y, test_z = test_set

    y_nclasses = 2
    z_nclasses = 5

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
    config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False, allow_soft_placement=True)  ###########################################
    with tf.Session(config=config) as sess:
        rnn = model.Model(
            nh1=s['nh1'],
            nh2=s['nh2'],
            ny=y_nclasses,
            nz=z_nclasses,
            de=s['emb_dimension'],
            cs=s['win'],
            lr=s['lr'],
            lr_decay=s['lr_decay'],
            embedding=embedding,
            max_gradient_norm=s['max_grad_norm'],
            batch_size=s['batch_size'],
            model_cell='lstm'
        )

        checkpoint_dir = s['check_dir']
        saver = tf.train.Saver(tf.all_variables())
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            # print(ckpt.all_model_checkpoint_paths[4])
            print(ckpt.model_checkpoint_path)
            logFile.write(ckpt.model_checkpoint_path + '\n')
            saver.restore(sess, ckpt.model_checkpoint_path)

        def dev_step(cwords):
            feed={
                rnn.input_x:cwords,
                rnn.keep_prob: s['keep_prob'],
                # rnn.batch_size:s['batch_size']
            }
            fetches=rnn.sz_pred
            sz_pred=sess.run(fetches=fetches,feed_dict=feed)
            return sz_pred


        predictions_test = []
        groundtruth_test = []
        start_num = 0
        indexInBatch = 0
        steps = len(test_lex) // s['batch_size']
        # for batch in tl.iterate.minibatches(test_lex, test_z, batch_size=s['batch_size']):
        print('testing............')
        logFile.write('testing............\n')
        for step in range(6):
            x, z = test_batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size'])
            x = load.pad_sentences(x)
            x = tools.contextwin_2(x, s['win'])
            predictions_test.extend(dev_step(x))
            groundtruth_test.extend(z)
            start_num += s['batch_size']
            if step % 100 == 0:
                print('tested %d batch......' % (step//100))
                logFile.write('tested %d batch......\n' % (step//100))
            while indexInBatch < len(predictions_test):
                curGoodNum = 0
                curPreKp = []
                curJsonData = json.loads(testLines[indexInBatch])
                #curLineList = (curJsonData["title"].strip().lower() + ' ' + curJsonData["abstract"].strip().lower()).split(' |,|.|:')
                #curLineList = re.split('[ ,.:]', )
                curLineList = nltk.word_tokenize((curJsonData["title"].strip().lower() + ' ' + curJsonData["abstract"].strip().lower()))
                #curLineList = curJsonData["abstract"].split(' ')
                print('indexOfLine is :', indexInBatch)
                print('len of curLineList is %d' % len(curLineList))
                print('len of predictions_test[%d] is %d' % (indexInBatch, len(predictions_test[indexInBatch])))
                print('len of groundtruth_test[%d] is %d' % (indexInBatch, len(groundtruth_test[indexInBatch])))
                lenOfLine = min(len(predictions_test[indexInBatch]), len(groundtruth_test[indexInBatch]), len(curLineList))
                print(predictions_test[indexInBatch])
                print(groundtruth_test[indexInBatch])

                logFile.write('indexOfLine is : %s \n' % indexInBatch)
                logFile.write('len of curLineList is %d \n' % len(curLineList))
                logFile.write('len of predictions_test[%d] is %d \n' % (indexInBatch, len(predictions_test[indexInBatch])))
                logFile.write('len of groundtruth_test[%d] is %d \n' % (indexInBatch, len(groundtruth_test[indexInBatch])))
                logFile.write(str(predictions_test[indexInBatch]) + '\n')
                logFile.write(str(groundtruth_test[indexInBatch]) + '\n')
                tmpStack = []
                for j in range(lenOfLine):
                    if predictions_test[indexInBatch][j] == 4:
                        curPreKp.append(curLineList[j])
                        tmpStack = []
                    elif predictions_test[indexInBatch][j] == 1 and len(tmpStack) == 0:
                        tmpStack.append(curLineList[j])
                    elif predictions_test[indexInBatch][j] == 2 and len(tmpStack) != 0:
                        tmpStack.append(curLineList[j])
                    elif predictions_test[indexInBatch][j] == 3 and len(tmpStack) != 0:
                            tmpStack.append(curLineList[j])
                            curPreKp.append(' '.join(tmpStack))
                            tmpStack = []
                    else:
                        tmpStack = []
                    if predictions_test[indexInBatch][j] != 0 and predictions_test[indexInBatch][j] == groundtruth_test[indexInBatch][j]:
                        curGoodNum += 1
                print('curGoodNum is ', curGoodNum)
                print('predict keyphrase is :', curPreKp)
                print('ground truth is :', curJsonData['keywords'].split(';'))
                print('=======================================================================================================================================')
                logFile.write('curGoodNum is %d \n' % curGoodNum)
                logFile.write('predict keyphrase is ' + str(curPreKp) + '\n')
                logFile.write('ground truth is :' + str(curJsonData['keywords'].split(';')) + '\n')
                logFile.write('=======================================================================================================================================\n')
                indexInBatch += 1
    logFile.close()
    '''
Exemplo n.º 18
0
def main(_):
    # 设置模型训练时输出信息等级
    tf.logging.set_verbosity(tf.logging.INFO)

    ########################
    # 文件夹及数据集路径检查   #
    ########################
    prepare_training_dir()

    ########################
    # 数据集准备             #
    ########################
    charset = read_charset(
        os.path.join(FLAGS.path_dataset_root,
                     DEFAULT_CONFIG['charset_filename']))
    print('chinese dict is as follows:')
    print(json.dumps(charset, ensure_ascii=False, encoding='UTF-8'))

    train_image_batch, train_label_batch, tfrecord_files = batch_input(
        'train', FLAGS.train_batch_size, len(charset), FLAGS.path_dataset_root,
        None)
    train_one_hot = slim.one_hot_encoding(train_label_batch, len(charset))
    # val_image_batch, val_label_batch = batch_input('val', FLAGS.val_batch_size, len(charset),
    #                                                FLAGS.path_dataset_root, None)

    ########################
    # 模型构建             #
    ########################
    shape_img = DEFAULT_CONFIG['image_shape']
    max_sequence_length = DEFAULT_CONFIG['max_sequence_length']
    pl_image = tf.placeholder(
        tf.float32,
        shape=[None, shape_img[0], shape_img[1], shape_img[2]],
        name='pl_image')
    pl_label = tf.placeholder(tf.int64,
                              shape=[None, max_sequence_length],
                              name='pl_label')
    one_hot_label = slim.one_hot_encoding(pl_label, len(charset))
    ocr_model = model.Model(num_char_classes=len(charset),
                            seq_length=DEFAULT_CONFIG['max_sequence_length'],
                            num_views=DEFAULT_CONFIG['num_of_views'],
                            null_code=DEFAULT_CONFIG['null_code'],
                            mparams=create_mparams())
    endpoints = ocr_model.create_base(pl_image, one_hot_label)
    chars_logit = endpoints.chars_logit
    predicted_text = endpoints.predicted_text
    total_loss = ocr_model.create_loss_v2(pl_label, endpoints)

    ########################
    # 优化器配置            #
    ########################
    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False,
                                  dtype=tf.int32)
    optimizer = create_optimizer()
    grads = optimizer.compute_gradients(total_loss)
    train_op = optimizer.apply_gradients(grads, global_step=global_step)

    ########################
    # 配置并开始训练          #
    ########################
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)
    config = tf.ConfigProto()
    config.allow_soft_placement = True
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    # 权重参数初始化
    sess.run(tf.local_variables_initializer())
    step = _init_weight(sess)
    print('tfrecord files for training: {}'.format(sess.run(tfrecord_files)))
    # 线程协调器
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    # 训练集样本总数
    num_per_epoch = DEFAULT_CONFIG['splits']['train']['size']
    num_per_step = FLAGS.train_batch_size
    epoch = step * num_per_step // num_per_epoch
    while epoch < FLAGS.num_epochs:
        batch_img_train, batch_label_train = sess.run(
            [train_image_batch, train_label_batch])
        # 模型在验证集上的评估
        ###########

        # 模型训练信息显示
        if step % FLAGS.display_step == 0:
            _ = sess.run(train_op,
                         feed_dict={
                             pl_image: batch_img_train,
                             pl_label: batch_label_train
                         })
            loss_train = sess.run(total_loss,
                                  feed_dict={
                                      pl_image: batch_img_train,
                                      pl_label: batch_label_train
                                  })
            print('epoch: {}, step: {}, train_loss: {}'.format(
                epoch, step, loss_train))
        else:
            _ = sess.run(train_op,
                         feed_dict={
                             pl_image: batch_img_train,
                             pl_label: batch_label_train
                         })
            # aa = sess.run(predicted_text, feed_dict={pl_image: batch_img_train, pl_label: batch_label_train})
            # bb = sess.run(chars_logit, feed_dict={pl_image: batch_img_train, pl_label: batch_label_train})

        # 模型保存
        if step % FLAGS.save_step == 0:
            checkpoint_path = os.path.join(FLAGS.train_log_dir,
                                           'ocr_chinese_model.ckpt')
            saver.save(sess, checkpoint_path, global_step=step)
            print('************save model at {} steps'.format(step))

        step += 1
        epoch = step * num_per_step // num_per_epoch

    coord.request_stop()
    coord.join(threads)
    sess.close()
Exemplo n.º 19
0
from train_OCR import create_mparams

FLAGS = tf.app.flags.FLAGS
"""使用ckpt文件评估模型"""

if __name__ == '__main__':
    path_img = './dataset_generate/data_sample/20455828_2605100732.jpg'
    path_ckpt = './train_logs/ocr_chinese_model.ckpt-1090000'

    charset = read_charset(
        os.path.join(FLAGS.path_dataset_root,
                     DEFAULT_CONFIG['charset_filename']))

    ocr_model = model.Model(num_char_classes=len(charset),
                            seq_length=DEFAULT_CONFIG['max_sequence_length'],
                            num_views=DEFAULT_CONFIG['num_of_views'],
                            null_code=DEFAULT_CONFIG['null_code'],
                            mparams=create_mparams(),
                            charset=charset)
    shape_img = DEFAULT_CONFIG['image_shape']
    max_sequence_length = DEFAULT_CONFIG['max_sequence_length']
    pl_image = tf.placeholder(
        tf.float32,
        shape=[None, shape_img[0], shape_img[1], shape_img[2]],
        name='pl_image')
    endpoints = ocr_model.create_base(pl_image, labels_one_hot=None)
    init_fn = ocr_model.create_init_fn_to_restore(path_ckpt)

    resize_height = DEFAULT_CONFIG['image_shape'][0]
    resize_width = DEFAULT_CONFIG['image_shape'][1]
    img = Image.open(path_img)
    img = np.array(img.resize((resize_width, resize_height), Image.ANTIALIAS),
Exemplo n.º 20
0
def main():
    s = {
        'nh1': 300,
        'nh2': 300,
        'win': 3,
        'emb_dimension': 300,
        'lr': 0.1,
        'lr_decay': 0.5,
        'max_grad_norm': 5,
        'seed': 345,
        'nepochs': 50,
        'batch_size': 16,
        'keep_prob': 1.0,
        'check_dir': './checkpoints',
        'display_test_per': 5,
        'lr_decay_per': 10
    }

    # load the dataset
    train_set, test_set, dic, embedding = load.atisfold()
    idx2label = dict((k, v) for v, k in dic['labels2idx'].iteritems())
    idx2word = dict((k, v) for v, k in dic['words2idx'].iteritems())

    vocab = set(dic['words2idx'].keys())
    vocsize = len(vocab)

    test_lex, test_y, test_z = test_set[0:1000]

    y_nclasses = 2
    z_nclasses = 5

    with tf.Session() as sess:

        rnn = model.Model(nh1=s['nh1'],
                          nh2=s['nh2'],
                          ny=y_nclasses,
                          nz=z_nclasses,
                          de=s['emb_dimension'],
                          cs=s['win'],
                          lr=s['lr'],
                          lr_decay=s['lr_decay'],
                          embedding=embedding,
                          max_gradient_norm=s['max_grad_norm'],
                          model_cell='lstm')

        checkpoint_dir = s['check_dir']
        saver = tf.train.Saver(tf.all_variables())
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)

        def dev_step(cwords):
            feed = {
                rnn.input_x: cwords,
                rnn.keep_prob: 1.0,
                rnn.batch_size: s['batch_size']
            }
            fetches = rnn.sz_pred
            sz_pred = sess.run(fetches=fetches, feed_dict=feed)
            return sz_pred

        print("测试结果�")
        predictions_test = []
        groundtruth_test = []
        for batch in tl.iterate.minibatches(test_lex,
                                            test_z,
                                            batch_size=s['batch_size']):
            x, z = batch
            x = load.pad_sentences(x)
            x = tools.contextwin_2(x, s['win'])
            predictions_test.extend(dev_step(x))
            groundtruth_test.extend(z)

        res_test = tools.conlleval(predictions_test, groundtruth_test, '')

        print res_test
Exemplo n.º 21
0
def main():
    s = {
        'nh1': 300,
        'nh2': 300,
        'win': 3,
        'emb_dimension': 300,
        'lr': 0.1,
        'lr_decay': 0.5,
        'max_grad_norm': 5,
        'seed': 345,
        'nepochs': 150,
        'batch_size': 16,
        'keep_prob': 0.5,
        'check_dir': './checkpoints',
        'display_test_per': 3,
        'lr_decay_per': 10
    }

    train_set, test_set, dic, embedding = load.atisfold()

    idx2label = dict((k, v) for v, k in dic['labels2idx'].items())
    idx2word = dict((k, v) for v, k in dic['words2idx'].items())

    train_lex, train_y, train_z = train_set

    tr = int(len(train_lex) * 0.9)
    valid_lex, valid_y, valid_z = train_lex[tr:], train_y[tr:], train_z[tr:]
    train_lex, train_y, train_z = train_lex[:tr], train_y[:tr], train_z[:tr]
    test_lex, test_y, test_z = test_set

    print('len(train_data) {}'.format(len(train_lex)))
    print('len(valid_data) {}'.format(len(valid_lex)))
    print('len(test_data) {}'.format(len(test_lex)))

    vocab = set(dic['words2idx'].keys())
    vocsize = len(vocab)
    print('len(vocab) {}'.format(vocsize))
    print("Train started!")

    y_nclasses = 2
    z_nclasses = 5

    nsentences = len(train_lex)

    # tf.reset_default_graph()
    print('#' * 30)
    with tf.Session() as sess:

        rnn = model.Model(nh1=s['nh1'],
                          nh2=s['nh2'],
                          ny=y_nclasses,
                          nz=z_nclasses,
                          de=s['emb_dimension'],
                          cs=s['win'],
                          lr=s['lr'],
                          lr_decay=s['lr_decay'],
                          embedding=embedding,
                          max_gradient_norm=s['max_grad_norm'],
                          model_cell='lstm')

        checkpoint_dir = s['check_dir']
        if not os.path.exists(checkpoint_dir):
            os.mkdir(checkpoint_dir)
        checkpoint_prefix = os.path.join(checkpoint_dir, 'model')
        print("*" * 30)

        def train_step(cwords, label_y, label_z):
            feed = {
                rnn.input_x: cwords,
                rnn.input_y: label_y,
                rnn.input_z: label_z,
                rnn.keep_prob: s['keep_prob'],
                rnn.batch_size: s['batch_size']
            }
            fetches = [rnn.loss, rnn.train_op]
            loss, _ = sess.run(fetches=fetches, feed_dict=feed)
            return loss

        def dev_step(cwords):
            feed = {
                rnn.input_x: cwords,
                rnn.keep_prob: 1.0,
                rnn.batch_size: s['batch_size']
            }
            fetches = rnn.sz_pred
            sz_pred = sess.run(fetches=fetches, feed_dict=feed)
            return sz_pred

        saver = tf.train.Saver(tf.all_variables())
        sess.run(tf.initialize_all_variables())

        best_f = -1
        best_e = 0
        test_best_f = -1
        test_best_e = 0
        best_res = None
        test_best_res = None
        for e in range(s['nepochs']):
            tools.shuffle([train_lex, train_y, train_z], s['seed'])
            t_start = time.time()
            for step, batch in enumerate(
                    tl.iterate.minibatches(train_lex,
                                           list(zip(train_y, train_z)),
                                           batch_size=s['batch_size'])):
                input_x, target = batch
                label_y, label_z = zip(*target)
                input_x = load.pad_sentences(input_x)
                label_y = load.pad_sentences(label_y)
                label_z = load.pad_sentences(label_z)
                # print(type(input_x))
                # print(type(s['win']))
                # print(input_x)
                # print(s['win'])
                cwords = tools.contextwin_2((input_x), s['win'])
                #print(s['batch_size'])
                loss = train_step(cwords, label_y, label_z)

                print(
                    'loss %.2f' % loss,
                    ' [learning] epoch %i>> %2.2f%%' %
                    (e, s['batch_size'] * step * 100. / nsentences),
                    'completed in %.2f (sec) <<\r' % (time.time() - t_start),
                )

                sys.stdout.flush()

            #VALID

            predictions_valid = []
            predictions_test = []
            groundtruth_valid = []
            groundtruth_test = []
            for batch in tl.iterate.minibatches(valid_lex,
                                                valid_z,
                                                batch_size=s['batch_size']):
                x, z = batch
                x = load.pad_sentences(x)
                x = tools.contextwin_2(x, s['win'])
                predictions_valid.extend(dev_step(x))
                groundtruth_valid.extend(z)

            res_valid = tools.conlleval(predictions_valid, groundtruth_valid,
                                        '')

            if res_valid['f'] > best_f:
                best_f = res_valid['f']
                best_e = e
                best_res = res_valid
                print('\nVALID new best:', res_valid)
                path = saver.save(sess=sess,
                                  save_path=checkpoint_prefix,
                                  global_step=e)
                print("Save model checkpoint to {}".format(path))
            else:
                print('\nVALID new curr:', res_valid)

            #TEST
            if e % s['display_test_per'] == 0:
                for batch in tl.iterate.minibatches(
                        test_lex, test_z, batch_size=s['batch_size']):
                    x, z = batch
                    x = load.pad_sentences(x)
                    x = tools.contextwin_2(x, s['win'])
                    predictions_test.extend(dev_step(x))
                    groundtruth_test.extend(z)

                res_test = tools.conlleval(predictions_test, groundtruth_test,
                                           '')

                if res_test['f'] > test_best_f:
                    test_best_f = res_test['f']
                    test_best_e = e
                    test_best_res = res_test
                    print('TEST new best:', res_test)
                else:
                    print('TEST new curr:', res_test)

            # learning rate decay if no improvement in 10 epochs
            if e - best_e > s['lr_decay_per']:
                sess.run(fetches=rnn.learning_rate_decay_op)
            lr = sess.run(fetches=rnn.lr)
            print('learning rate:%f' % lr)
            if lr < 1e-5: break
            print()

        print("Train finished!")
        print('Valid Best Result: epoch %d:  ' % (best_e), best_res)
        print('Test Best Result: epoch %d:  ' % (test_best_e), test_best_res)
Exemplo n.º 22
0
from models import model
from handlers import config_pattern


class Application(web.Application):
    def __init__(self, model):

        self.model = model

        #config setting from options
        settings = dict(template_path=os.path.join(os.path.dirname(__file__),
                                                   "templates"),
                        cookie_secret=options.cookie_secret,
                        static_path=os.path.join(os.path.dirname(__file__),
                                                 "static"))

        #setup DB
        # self.db = torndb.Connection("%s:%s" % (options.mysql["host"], options.mysql["port"]), options.mysql["database"], user=options.mysql["user"], password=options.mysql["password"], charset='utf8')

        #handlersPattern
        handlers = config_pattern.handlersPattern
        super(Application, self).__init__(handlers, **settings)


if __name__ == '__main__':
    myOptions.parse_options()
    app = Application(model.Model())
    app.listen(options.port)

    IOLoop.instance().start()
Exemplo n.º 23
0
 def resume(self, url, image_recognition):
     m = model.Model()
     m.process_website(url, True, image_recognition)
Exemplo n.º 24
0
 def start(self, url, image_recognition):
     m = model.Model()
     m.process_website(url, False, image_recognition)
Exemplo n.º 25
0
        collate_fn=dataset.alignCollate())

if args.resume is not None:
    print('loading pretrained class from {}'.format(args.resume))
    checkpoint = torch.load(args.resume,
                            map_location=lambda storage, loc: storage)
    args.alphabet = checkpoint['alphabet']
    del checkpoint
else:
    args.alphabet = util.get_vocab(root=args.root, label=args.train_label)

args.num_class = len(args.alphabet) + 1
converter = convert.strLabelConverter(args.alphabet)

model = model.Model(num_classes=args.num_class,
                    fixed_height=args.height,
                    net=args.net)
model = dcrnn.Model(n_classes=args.num_class, fixed_height=args.height)
optimizer = optim.Adam(model.parameters(),
                       lr=args.learning_rate,
                       betas=(0.5, 0.999))

if args.resume is not None:
    print('loading pretrained model from {}'.format(args.resume))
    checkpoint = torch.load(args.resume,
                            map_location=lambda storage, loc: storage)
    model.load_state_dict(checkpoint['state_dict'])
    del checkpoint
criterion = CTCLoss()

global image, text, length
Exemplo n.º 26
0
    #         if lst1[i] and lst2[i]: #same and 1
    #             count+=1
    #         elif (not lst1[i] and lst2[i]) or (lst1[i] and not lst2[i]): #one of them has 1 and diff
    #             diff+=1
    #     return count/diff
    def jaccard(lst1, lst2):
        lst1 = np.asarray(lst1)
        lst2 = np.asarray(lst2)
        return np.double(np.bitwise_and(lst1, lst2).sum()) / np.double(
            np.bitwise_or(lst1, lst2).sum())


if __name__ == '__main__':
    reuterdicpath = os.path.dirname(os.path.dirname(
        os.path.realpath(__file__))) + "\\dictionaryBuilding\\reutersdic.json"
    with open(reuterdicpath, 'r') as f:
        dic = json.load(f)
        inverted_index = model.Model('vsm').buildIndex(dic)
        print(len(inverted_index))
        expander = QueryExpansion(dic, inverted_index)
    # with open('unique_words.json', 'w')as f:
    #     words=expander.unique_words()
    #     json.dump(words,f)
    # with open('docvec.json', 'w') as f:
    #     json.dump(expander.get_doc_vector(), f)
    # expander.build_thesaurus()
    # print(expander.build_thesaurus())
    with open('reuters_theaurus.json', 'w') as f:
        #
        json.dump(expander.build_thesaurus(), f)
Exemplo n.º 27
0
 def test_get_client(self):
     ds = model.Model().get_client()
     query = ds.query(kind='courses')
     result = list(query.fetch())
     assert 1 < 2
Exemplo n.º 28
0
import copy
# from tqdm import tqdm

config = DefaultConfig()

if config.use_hyperboard:
    from hyperboard import Agent
    agent = Agent(username='******', password='******', port=5005)
    parameter = config.todict()
    validate_loss_record = agent.register(parameter, 'loss', overwrite=True)

train_dataset = dataset.MyDataset()
validate_dataset = dataset.MyDataset()

criticer = torch.nn.MSELoss()
model = model.Model()
optimizer = optim.Adam(model.parameters(), lr=config.lr)
if config.gpu >= 0:
    model.cuda(config.gpu)

max_loss = 0
no_gain = 0
global_step = 0
train_num = len(train_dataset)

model.train()
for epoch in range(config.epoch_num):
    train_loader = DataLoader(train_dataset,
                              batch_size=config.batch_size,
                              shuffle=True)
Exemplo n.º 29
0
def create_model(config):
    net = model.Model(config.n_classes,
                      feature_extractor=config.feature_extractor,
                      metric_learning=config.metric_learning)
    return net
Exemplo n.º 30
0
def main():
    s = {
        'nh1': 300,
        'nh2': 300,
        'win': 3,
        'emb_dimension': 300,
        'lr': 0.01,
        'lr_decay': 0.5,  #
        'max_grad_norm': 5,  #
        'seed': 345,  #
        'nepochs': 50,
        'batch_size': 16,
        'keep_prob': 0.5,
        'check_dir': './checkpoints/GZ_EMNLP2016/kp20k_0.01_16',
        'display_test_per': 1,  #
        'lr_decay_per': 5  #
    }

    # data_set_file = 'data/ACL2017/inspec/inspec_t_a_GZ_data_set.pkl'
    # emb_file = 'data/ACL2017/inspec/inspec_t_a_GZ_embedding.pkl'
    # data_set_file = 'data/ACL2017/semeval/semeval_t_a_GZ_data_set.pkl'
    #emb_file = 'data/ACL2017/semeval/semeval_t_a_GZ_embedding.pkl'
    data_set_file = 'data/ACL2017/kp20k/kp20k_t_a_allwords_data_set.pkl'
    emb_file = 'data/ACL2017/kp20k/ACL2017_t_a_embedding.pkl'
    #data_set_file = 'data/ACL2017/inspec/inspec_t_a_GZ_data_set.pkl'
    #emb_file = 'data/ACL2017/inspec/inspec_t_a_GZ_embedding.pkl'
    # data_set_file = 'data/ACL2017/semeval/semeval_t_a_GZ_data_set.pkl'
    # emb_file = 'data/ACL2017/semeval/semeval_t_a_GZ_embedding.pkl'
    #data_set_file = 'data/ACL2017/nus/nus_t_a_GZ_data_set.pkl'
    #emb_file = 'data/ACL2017/nus/nus_t_a_GZ_embedding.pkl'
    #data_set_file = 'data/ACL2017/krapivin/krapivin_t_a_GZ_data_set.pkl'
    #emb_file = 'data/ACL2017/krapivin/krapivin_t_a_GZ_embedding.pkl'
    print('loading dataset.....')
    # train_set,test_set,dic,embedding = load.atisfold(data_set_file, emb_file)
    train_set, valid_set, test_set, dic, embedding = load.atisfold_ACL2017(
        data_set_file, emb_file)
    # idx2label = dict((k,v) for v,k in dic['labels2idx'].iteritems())
    # idx2word  = dict((k,v) for v,k in dic['words2idx'].iteritems())

    train_lex, train_y, train_z = train_set
    # train_lex: [[每条tweet的word的idx],[每条tweet的word的idx]], train_y: [[关键词的位置为1]], train_z: [[关键词的位置为0~4(开头、结尾...)]]
    # tr = int(len(train_lex)*0.9)
    # valid_lex, valid_y, valid_z = train_lex[tr:], train_y[tr:], train_z[tr:]
    # train_lex, train_y, train_z = train_lex[:tr], train_y[:tr], train_z[:tr]
    # test_lex,  test_y, test_z  = test_set
    valid_lex, valid_y, valid_z = valid_set
    test_lex, test_y, test_z = test_set
    log_dir = s['check_dir']
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)
    logfile = open(str(s['check_dir']) + '/log.txt',
                   'a',
                   encoding='utf-8',
                   buffering=1)
    print('len(train_data) {}'.format(len(train_lex)))
    print('len(valid_data) {}'.format(len(valid_lex)))
    print('len(test_data) {}'.format(len(test_lex)))
    logfile.write('len(train_data) {}\n'.format(len(train_lex)))
    logfile.write('len(valid_data) {}\n'.format(len(valid_lex)))
    logfile.write('len(test_data) {}\n'.format(len(test_lex)))
    vocab = set(dic['words2idx'].keys())
    vocsize = len(vocab)
    print('len(vocab) {}'.format(vocsize))
    print("Train started!")
    logfile.write('len(vocab) {}\n'.format(vocsize))
    logfile.write("Train started!\n")
    y_nclasses = 2
    z_nclasses = 5

    nsentences = len(train_lex)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
        allow_soft_placement=True)  ###########################################
    with tf.compat.v1.Session(
            config=config) as sess:  #####################################
        rnn = model.Model(nh1=s['nh1'],
                          nh2=s['nh2'],
                          ny=y_nclasses,
                          nz=z_nclasses,
                          de=s['emb_dimension'],
                          cs=s['win'],
                          lr=s['lr'],
                          lr_decay=s['lr_decay'],
                          embedding=embedding,
                          max_gradient_norm=s['max_grad_norm'],
                          batch_size=s['batch_size'],
                          model_cell='lstm')
        #     my_model = mymodel.myModel(
        #         # nh1=s['nh1'],
        #         # nh2=s['nh2'],
        #         # ny=y_nclasses,
        #         # nz=z_nclasses,
        #         de=s['emb_dimension'],
        #         lr=s['lr'],
        #         lr_decay=s['lr_decay'],
        #         embedding=embedding,
        #         max_gradient_norm=s['max_grad_norm'],
        #         keep_prob=s['keep_prob'],
        #         model_cell='lstm'
        #     )

        # 保存模型
        checkpoint_dir = s['check_dir']
        if not os.path.exists(checkpoint_dir):
            os.mkdir(checkpoint_dir)
        checkpoint_prefix = os.path.join(checkpoint_dir, 'model')

        def train_step(cwords, label_y, label_z):
            feed = {
                rnn.input_x: cwords,
                rnn.input_y: label_y,
                rnn.input_z: label_z,
                rnn.keep_prob: s['keep_prob']
                # rnn.batch_size:s['batch_size']
            }
            fetches = [rnn.loss, rnn.train_op]
            loss, _ = sess.run(fetches=fetches, feed_dict=feed)
            # _,Loss = sess.run(fetches=fetches, feed_dict=feed)
            return loss

        def dev_step(cwords):
            feed = {
                rnn.input_x: cwords,
                rnn.keep_prob: 1.0
                # rnn.keep_prob:1.0,
                # rnn.batch_size:s['batch_size']
            }
            fetches = rnn.sz_pred
            sz_pred = sess.run(fetches=fetches, feed_dict=feed)
            return sz_pred

        saver = tf.train.Saver(tf.all_variables(), max_to_keep=2)
        sess.run(tf.global_variables_initializer())

        best_f = -1
        best_e = 0
        test_best_f = -1
        test_best_e = 0
        best_res = None
        test_best_res = None
        for e in range(s['nepochs']):
            tools.shuffle([train_lex, train_y, train_z], s['seed'])
            t_start = time.time()
            start_num = 0
            # for step,batch in enumerate(tl.iterate.minibatches(train_lex,list(zip(train_y,train_z)),batch_size=s['batch_size'])):
            # for step, batch in enumerate(batch_putin(train_lex, list(zip(train_y, train_z)), start_num=start_num, batch_size=s['batch_size'])):
            steps = len(train_lex) // s['batch_size']
            for step in range(steps):
                # batch = batch_putin(train_lex,list(zip(train_y, train_z)), start_num=start_num, batch_size=s['batch_size'])
                # input_x,target=batch
                # label_y,label_z=list(zip(*target))
                input_x, label_y, label_z = train_batch_putin(
                    train_lex,
                    train_y,
                    train_z,
                    start_num=start_num,
                    batch_size=s['batch_size'])
                input_x = load.pad_sentences(input_x)
                label_y = load.pad_sentences(label_y)
                label_z = load.pad_sentences(label_z)
                cwords = tools.contextwin_2(input_x, s['win'])
                # cwords = input_x
                loss = train_step(cwords, label_y, label_z)
                start_num += s['batch_size']
                print(
                    'loss %.6f' % loss, ' [learning] epoch %i>> %2.2f%%' %
                    (e, s['batch_size'] * step * 100. / nsentences),
                    'completed in %.2f (sec) <<\r' % (time.time() - t_start))
                if step % 1000 == 0:
                    logfile.write('loss %.6f' % loss)
                    logfile.write(
                        ' [learning] epoch %i>> %2.2f%%' %
                        (e, s['batch_size'] * step * 100. / nsentences))
                    logfile.write('completed in %.2f (sec) <<\n' %
                                  (time.time() - t_start))
                # sys.stdout.flush())

            #VALID
            if e >= 0:
                print('Validing..............')
                predictions_valid = []
                predictions_test = []
                groundtruth_valid = []
                groundtruth_test = []
                start_num = 0
                steps = len(valid_lex) // s['batch_size']
                # for batch in  tl.iterate.minibatches(valid_lex,valid_z,batch_size=s['batch_size']):
                for step in range(steps):
                    # batch = batch_putin(valid_lex, valid_z, start_num=start_num, batch_size=s['batch_size'])
                    # x,z=batch
                    x, z = test_batch_putin(valid_lex,
                                            valid_z,
                                            start_num=start_num,
                                            batch_size=s['batch_size'])
                    x = load.pad_sentences(x)
                    x = tools.contextwin_2(x, s['win'])
                    predictions_valid.extend(dev_step(x))
                    groundtruth_valid.extend(z)
                    start_num += s['batch_size']

                res_valid = tools.conlleval(predictions_valid,
                                            groundtruth_valid)
                del predictions_valid
                del groundtruth_valid
                if res_valid['f'] > best_f:
                    best_f = res_valid['f']
                    best_e = e
                    best_res = res_valid
                    print('\nVALID new best:', res_valid)
                    logfile.write('\nVALID new best: ' + str(res_valid))
                    path = saver.save(sess=sess,
                                      save_path=checkpoint_prefix,
                                      global_step=e)
                    print("Save model checkpoint to {}".format(path))
                    logfile.write(
                        "\nSave model checkpoint to {}\n".format(path))
                else:
                    print('\nVALID new curr:', res_valid)
                    logfile.write('\nVALID new curr: ' + str(res_valid))

                #TEST
                print('Testing..............')
                start_num = 0
                steps = len(test_lex) // s['batch_size']
                if e % s['display_test_per'] == 0:
                    # for batch in tl.iterate.minibatches(test_lex, test_z, batch_size=s['batch_size']):
                    for step in range(steps):
                        # batch = batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size'])
                        # x,z = batch
                        x, z = test_batch_putin(test_lex,
                                                test_z,
                                                start_num=start_num,
                                                batch_size=s['batch_size'])
                        x = load.pad_sentences(x)
                        x = tools.contextwin_2(x, s['win'])
                        predictions_test.extend(dev_step(x))
                        groundtruth_test.extend(z)
                        start_num += s['batch_size']

                    res_test = tools.conlleval(predictions_test,
                                               groundtruth_test)

                    if res_test['f'] > test_best_f:
                        test_best_f = res_test['f']
                        test_best_e = e
                        test_best_res = res_test
                        print('TEST new best:', res_test)
                        logfile.write('\nTEST new best: ' + str(res_test))
                    else:
                        print('TEST new curr:', res_test)
                        logfile.write('\nTEST new curr: ' + str(res_test))

                # learning rate decay if no improvement in 10 epochs
                if e - best_e > s['lr_decay_per']:
                    sess.run(fetches=rnn.learning_rate_decay_op)
                lr = sess.run(fetches=rnn.lr)
                print('learning rate:%f' % lr)
                logfile.write('\nlearning rate:%f\n' % lr)
                if lr < 1e-6: break

        print("Train finished!")
        print('Valid Best Result: epoch %d:  ' % (best_e), best_res)
        print('Test Best Result: epoch %d:  ' % (test_best_e), test_best_res)
        logfile.write("Train finished!\n")
        logfile.write('Valid Best Result: epoch %d:   ' % (best_e) +
                      str(best_res))
        logfile.write('\nTest Best Result: epoch %d:   ' % (test_best_e) +
                      str(test_best_res))
        logfile.close()