示例#1
0
	def __init__(self, mode = 'chat'):
		if not os.path.isdir(config.PROCESSED_PATH):
			data.prepare_raw_data()
			data.process_data()

		# create checkpoints folder if there isn't one already
		data.make_dir(config.CPT_PATH)

		if(mode == "chat"):
			self.__chat_init()
示例#2
0
def write_results(summaries, train_loss, losses, filepath, gt_summaries_path):
    '''  write evaluation results to file  '''
    dir_path = os.path.join(filepath, 'generated_summaries')
    data.make_dir(dir_path)
    with open(os.path.join(filepath, 'log'), 'w') as f:
        f.write('Avg Train loss: ' + str(train_loss))
        for loss in losses:
            f.write(str(loss) + '\n')
        f.write('\n' * 3)
        f.write('\n' * 3)
        for i, generated in enumerate(summaries):
            f.write(str(i) + ' Generated headline: ' + generated + '\n\n')
示例#3
0
def main():
    #parser = argparse.ArgumentParser()
    #parser.add_argument('--mode', choices={'train', 'chat'},
     #                   default='train', help="mode. if not specified, it's in the train mode")
    #args = parser.parse_args()

    if not os.path.isdir(config.PROCESSED_PATH):
        data.prepare_raw_data()
        data.process_data()
    print('Data ready!')
    # create checkpoints folder if there isn't one already
    data.make_dir(config.CPT_PATH)
示例#4
0
def main(args):
    if not os.path.isdir(config.PROCESSED_PATH):
        data.process_data()
    print('Data is ready!')

    data.make_dir(config.CPT_PATH)

    mode = args[-1]

    if mode == 'train':
        train()

    elif mode == 'test':
        predict()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--mode', choices={'train', 'chat'},
                        default='train', help="mode. if not specified, it's in the train mode")
    args = parser.parse_args()

    if not os.path.isdir(config.PROCESSED_PATH):
        data.prepare_raw_data()
        data.process_data()
    print('Data ready!')
    # create checkpoints folder if there isn't one already
    data.make_dir(config.CPT_PATH)

    if args.mode == 'train':
        train()
    elif args.mode == 'chat':
        chat()
示例#6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--mode',choices={'train','chat'},default='train',help="mode if not specified its in train mode")
    
    args = parser.parse_args()
    
    if not os.path.isdir(config.PROCESSED_PATH):
        data.prepare_raw_data()
        data.process_data()
    print('Data Ready!')
    
    data.make_dir(config.CPT_PATH)
    
    if args.mode == 'train':
        train()
    elif args.mode == 'chat':
        chat()
示例#7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--mode', choices={'train', 'chat'},
                        default='train', help="mode. if not specified, it's in the train mode")

    args = parser.parse_args()

    if not os.path.isdir(config.PROCESSED_PATH):
        data.prepare_raw_data()
        data.process_data()

    print("Data ready, starting application")
    # create checkpoints folder if there isn't one already
    data.make_dir(config.CPT_PATH)

    if args.mode == 'train':
        train()
    elif args.mode == 'chat':
        chat()
示例#8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('mode',
                        choices={'train', 'test', 'translate'},
                        default='train',
                        help="mode. if not specified, it's in the train mode")
    args = parser.parse_args()

    if not os.path.isdir(config.PROCESSED_PATH):
        data.prepare_raw_data()
        data.process_data()
    print('Data ready!')
    # create checkpoints folder if there isn't one already
    data.make_dir(config.CPT_PATH)

    if args.mode == 'train':
        train()
    elif args.mode == 'test':
        bleu_scores = test()
    elif args.mode == 'translate':
        translate()
示例#9
0
def main():
    """parser = argparse.ArgumentParser()
    parser.add_argument('--mode', choices={'train', 'chat'},
                        default='train', help="mode. if not specified, it's in the train mode")
    args = parser.parse_args()"""

    if not os.path.isdir(config.PROCESSED_PATH):
        data.prepare_raw_data()
        data.process_data()
    print('Data ready!')
    # create checkpoints folder if there isn't one already
    data.make_dir(config.CPT_PATH)
    mode = input("Input mode (train|chat): ")
    """if args.mode == 'train':
        train()
    elif args.mode == 'chat':
        chat()"""
    if mode == 'train':
        train()
    else:
        chat()
示例#10
0
def start_training():
    if not os.path.isdir(config.PROCESSED_PATH):
        data.prepare_raw_data()
        data.process_data()
    print('Data ready!')

    # create checkpoints folder if there isn't one already
    data.make_dir(config.CPT_PATH)
    """ Train the bot """
    test_buckets, data_buckets, train_buckets_scale = _get_buckets()
    # in train mode, we need to create the backward path, so forwrad_only is False
    model = Seq2SeqModel(False, config.BATCH_SIZE)
    model.build_graph()

    saver = tf.train.Saver()

    with tf.Session() as sess:
        print('Running session')
        sess.run(tf.global_variables_initializer())
        _check_restore_parameters(sess, saver)

        iteration = model.global_step.eval()
        total_loss = 0
        # Infinite loop
        print('Start training ...')
        train_record_file = open(
            os.path.join(config.PROCESSED_PATH, config.TRAINING_RECORD_FILE),
            'a+')
        test_record_file = open(
            os.path.join(config.PROCESSED_PATH, config.TESTING_RECORD_FILE),
            'a+')
        while True:
            try:
                skip_step = _get_skip_step(iteration)
                bucket_id = _get_random_bucket(train_buckets_scale)
                encoder_inputs, decoder_inputs, decoder_masks = data.get_batch(
                    data_buckets[bucket_id],
                    bucket_id,
                    batch_size=config.BATCH_SIZE)
                start = time.time()
                _, step_loss, _ = run_step(sess, model, encoder_inputs,
                                           decoder_inputs, decoder_masks,
                                           bucket_id, False)
                total_loss += step_loss
                iteration += 1

                if iteration % skip_step == 0:
                    _train_info = 'Iter {}: loss {}, time {}'.format(
                        iteration, total_loss / skip_step,
                        time.time() - start)
                    print(_train_info)
                    train_record_file.write(_train_info + '\n')
                    start = time.time()
                    total_loss = 0
                    saver.save(sess,
                               os.path.join(config.CPT_PATH, 'chatbot'),
                               global_step=model.global_step)
                    if iteration % (10 * skip_step) == 0:
                        # Run evals on development set and print their loss
                        _test_info = _eval_test_set(sess, model, test_buckets)
                        for item in _test_info:
                            print(item)
                            test_record_file.write("%s\n" % item)
                        start = time.time()
                    sys.stdout.flush()
            except KeyboardInterrupt:
                print('Interrupted by user at iteration {}'.format(iteration))
                train_record_file.close()
                test_record_file.close()
示例#11
0
def make_dirs(name):
    data.make_dir(os.path.join(path, name + '_summaries'))
    summary_path = os.path.join(path, name + '_summaries/reference')
    data.make_dir(summary_path)
    data.make_dir(os.path.join(path, name + '_summaries/system'))
    return summary_path
示例#12
0
 def _setup_results(self):
     '''Initialize path to results'''
     print 'Setting up results directory'
     self.results_path = os.path.join(self.sess_dir, 'results')
     data.make_dir(self.results_path)
示例#13
0
 def _setup_checkpoints(self):
     '''Initialize path to checkpoints'''
     print 'Setting up checkpoints directory'
     self.checkpoint_path = os.path.join(self.sess_dir, 'checkpoint')
     data.make_dir(self.checkpoint_path)
示例#14
0
 def _setup_sess_dir(self):
     '''Set up session directory.  All outputs from the program go here'''
     print 'Setting up directory for session'
     self.sess_dir = os.path.join('', self.sess_name)
     data.make_dir(self.sess_dir)
示例#15
0
    def evaluate(self, sess, train_losses, iteration, test=False):
        '''  Evaluate the model's current state on dev or test set  '''
        bucket_loss_texts = []
        bucket_losses = []
        summaries = []
        eval_iter = 0
        avg_loss = np.sum(train_losses) / len(train_losses)
        print 'Average train loss', avg_loss
        eval_start = time.time()

        for bucket_index in xrange(len(config.BUCKETS)):
            bucket_count = len(self.test_data[bucket_index]['enc_input'])
            if bucket_count == 0:
                print 'Test: empty bucket', bucket_index
                continue
            bucket_loss = []
            while True:
                # get data
                eval_data = self.test_data if test else self.dev_data
                batch_data = data.get_batch(eval_data, bucket_index,
                                            config.BUCKETS, config.BATCH_SIZE,
                                            eval_iter)
                encoder_inputs = batch_data[0]
                decoder_inputs = batch_data[1]
                decoder_masks = batch_data[2]
                done = batch_data[3]
                # run model
                _, step_loss, output_logits = self.run_step(
                    sess, encoder_inputs, decoder_inputs, decoder_masks,
                    config.BATCH_SIZE, bucket_index, False)
                bucket_loss.append(step_loss)
                output_logits = np.array(output_logits)
                # get summaries
                for i in xrange(config.BATCH_SIZE):
                    summaries.append(
                        self._construct_seq(output_logits[:, i, :]))
                eval_iter += 1
                if done:
                    eval_iter = 0
                    break

            loss_text = 'Test bucket:', bucket_index, 'Avg Loss:', \
                (sum(bucket_loss) / len(bucket_loss))
            print loss_text
            bucket_losses.append(sum(bucket_loss))
            bucket_loss_texts.append(loss_text)

        path = os.path.join(self.results_path, 'iter_' + str(iteration))
        # adjust learning rate if dev loss increases
        # if self.dev_loss is not None:
        #     if sum([self.dev_loss[i] < bucket_losses[i]
        #             for i in range(len(bucket_losses))]) > 0:
        #         self.lr /= 2
        #         print 'Learning rate adjusted'
        self.dev_loss = bucket_losses

        # log results
        if test:
            path += '_test'
        data.make_dir(path)
        gt_path = self.test_headlines_path if test else self.dev_headlines_path
        utils.write_results(summaries, avg_loss, bucket_loss_texts, path,
                            gt_path)
        print 'Wrote results to', path
        print 'Evaluation took', time.time() - eval_start