Exemplo n.º 1
0
def main(unused_argv):
    if not FLAGS.data_dir:
        raise ValueError("--data_dir is required.")

    encoder = encoder_manager.EncoderManager()

    # Maybe load unidirectional encoder.
    if FLAGS.uni_checkpoint_path:
        print("Loading unidirectional model...")
        uni_config = configuration.model_config()
        encoder.load_model(
            uni_config, FLAGS.uni_vocab_file,
            FLAGS.uni_embeddings_file, FLAGS.uni_checkpoint_path)

    # Maybe load bidirectional encoder.
    if FLAGS.bi_checkpoint_path:
        print("Loading bidirectional model...")
        bi_config = configuration.model_config(bidirectional_encoder=True)
        encoder.load_model(
            bi_config, FLAGS.bi_vocab_file, FLAGS.bi_embeddings_file,
            FLAGS.bi_checkpoint_path)

    if FLAGS.eval_task in ["MR", "CR", "SUBJ", "MPQA"]:
        eval_classification.eval_nested_kfold(
            encoder, FLAGS.eval_task, FLAGS.data_dir, use_nb=False)
    elif FLAGS.eval_task == "SICK":
        eval_sick.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir)
    elif FLAGS.eval_task == "MSRP":
        eval_msrp.evaluate(encoder, evalcv=True, evaltest=True, use_feats=True, loc=FLAGS.data_dir)
    elif FLAGS.eval_task == "TREC":
        eval_trec.evaluate(encoder, evalcv=True, evaltest=True, loc=FLAGS.data_dir)
    else:
        raise ValueError("Unrecognized eval_task: %s" % FLAGS.eval_task)

    encoder.close()
Exemplo n.º 2
0
def main(unused_argv):
  if not FLAGS.data_dir:
    raise ValueError("--data_dir is required.")

  encoder = encoder_manager.EncoderManager()

  # Maybe load unidirectional encoder.
  if FLAGS.uni_checkpoint_path:
    print("Loading unidirectional model...")
    uni_config = configuration.model_config()
    encoder.load_model(uni_config, FLAGS.uni_vocab_file,
                       FLAGS.uni_embeddings_file, FLAGS.uni_checkpoint_path)

  # Maybe load bidirectional encoder.
  if FLAGS.bi_checkpoint_path:
    print("Loading bidirectional model...")
    bi_config = configuration.model_config(bidirectional_encoder=True)
    encoder.load_model(bi_config, FLAGS.bi_vocab_file, FLAGS.bi_embeddings_file,
                       FLAGS.bi_checkpoint_path)

  if FLAGS.eval_task in ["MR", "CR", "SUBJ", "MPQA"]:
    eval_classification.eval_nested_kfold(
        encoder, FLAGS.eval_task, FLAGS.data_dir, use_nb=False)
  elif FLAGS.eval_task == "SICK":
    eval_sick.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir)
  elif FLAGS.eval_task == "MSRP":
    eval_msrp.evaluate(
        encoder, evalcv=True, evaltest=True, use_feats=True, loc=FLAGS.data_dir)
  elif FLAGS.eval_task == "TREC":
    eval_trec.evaluate(encoder, evalcv=True, evaltest=True, loc=FLAGS.data_dir)
  else:
    raise ValueError("Unrecognized eval_task: %s" % FLAGS.eval_task)

  encoder.close()
Exemplo n.º 3
0
def main(unused_argv):
    if not FLAGS.data_dir:
        raise ValueError("--data_dir is required.")
    if not FLAGS.output_dir:
        raise ValueError("--output_dir is required.")

    encoder = encoder_manager.EncoderManager()

    # Maybe load unidirectional encoder.
    if FLAGS.uni_checkpoint_path:
        print("Loading unidirectional model...")
        uni_config = configuration.model_config()
        encoder.load_model(uni_config, FLAGS.uni_vocab_file,
                           FLAGS.uni_embeddings_file,
                           FLAGS.uni_checkpoint_path)

    # Maybe load bidirectional encoder.
    if FLAGS.bi_checkpoint_path:
        print("Loading bidirectional model...")
        bi_config = configuration.model_config(bidirectional_encoder=True)
        encoder.load_model(bi_config, FLAGS.bi_vocab_file,
                           FLAGS.bi_embeddings_file, FLAGS.bi_checkpoint_path)

    evaluate(encoder, FLAGS.output_dir, evaltest=True, loc=FLAGS.data_dir)

    encoder.close()
Exemplo n.º 4
0
def extract_by_skip_thought(sent_list: List[str]):
    """
    To make it compatible with the toolkit, we need the input to be a list of sentences
    :param sent_list:
    :return:
    """
    skip_thought_dir = os.path.join('/home/junpeiz/Project/Twitter/data',
                                    'skipThoughts', 'pretrained',
                                    'skip_thoughts_uni_2017_02_02')
    # Set paths to the model.
    VOCAB_FILE = os.path.join(skip_thought_dir, "vocab.txt")
    EMBEDDING_MATRIX_FILE = os.path.join(skip_thought_dir, "embeddings.npy")
    CHECKPOINT_PATH = os.path.join(skip_thought_dir, "model.ckpt-501424")
    # The following directory should contain files rt-polarity.neg and
    # rt-polarity.pos.
    # MR_DATA_DIR = "/dir/containing/mr/data"

    # Set up the encoder. Here we are using a single unidirectional model.
    # To use a bidirectional model as well, call load_model() again with
    # configuration.model_config(bidirectional_encoder=True) and paths to the
    # bidirectional model's files. The encoder will use the concatenation of
    # all loaded models.
    encoder = encoder_manager.EncoderManager()
    encoder.load_model(configuration.model_config(),
                       vocabulary_file=VOCAB_FILE,
                       embedding_matrix_file=EMBEDDING_MATRIX_FILE,
                       checkpoint_path=CHECKPOINT_PATH)

    encoding_list = encoder.encode(sent_list)
    return encoding_list
Exemplo n.º 5
0
def get_encoder():
    # Download and extract the bidirectional model. (shell script)
    # cd models/
    # wget "http://download.tensorflow.org/models/skip_thoughts_bi_2017_02_16.tar.gz"
    # tar -xvf skip_thoughts_bi_2017_02_16.tar.gz
    # rm skip_thoughts_bi_2017_02_16.tar.gz
    # cd ..
    #
    # Set paths to the model.
    pretrained_path = 'models/skip_thoughts_bi_2017_02_16/'
    VOCAB_FILE = os.path.join(pretrained_path, 'vocab.txt')
    EMBEDDING_MATRIX_FILE = os.path.join(pretrained_path, 'embeddings.npy')
    CHECKPOINT_PATH = os.path.join(pretrained_path, 'model.ckpt-500008')

    # Set up the encoder. Here we are using a single unidirectional model.
    # To use a bidirectional model as well, call load_model() again with
    # configuration.model_config(bidirectional_encoder=True) and paths to the
    # bidirectional model's files. The encoder will use the concatenation of
    # all loaded models.
    encoder = encoder_manager.EncoderManager()
    encoder.load_model(configuration.model_config(bidirectional_encoder=True),
                       vocabulary_file=VOCAB_FILE,
                       embedding_matrix_file=EMBEDDING_MATRIX_FILE,
                       checkpoint_path=CHECKPOINT_PATH)

    return encoder
Exemplo n.º 6
0
    def __init__(self, use_char=False):
        super(SkipThought, self).__init__()
        self.use_char2vec = use_char

        cur_path = os.path.abspath(os.path.dirname(__file__))
        # Set paths to the model.
        VOCAB_FILE = os.path.join(
            cur_path, "../../models/skip_thoughts_uni_2017_02_02/vocab.txt")
        EMBEDDING_MATRIX_FILE = os.path.join(
            cur_path,
            "../../models/skip_thoughts_uni_2017_02_02/embeddings.txt")
        CHECKPOINT_PATH = os.path.join(
            cur_path,
            "../../models/skip_thoughts_uni_2017_02_02/model.ckpt-501424")

        self.encoder = encoder_manager.EncoderManager()
        self.encoder.load_model(configuration.model_config(),
                                vocabulary_file=VOCAB_FILE,
                                embedding_matrix_file=EMBEDDING_MATRIX_FILE,
                                checkpoint_path=CHECKPOINT_PATH)

        if self.use_char2vec:
            PROJ_MODEL_PATH = os.path.join(
                cur_path,
                "../../models/char_word2vec/skip-thought_linear_projection.m")
            self.char_w2v = CharWord2vec()
            with open(PROJ_MODEL_PATH) as f:
                self.proj = pickle.load(f)
Exemplo n.º 7
0
def load_model(vocab_file, embedding_matrix_file, checkpoint_path,
               bidirectional_encoder):
    encoder = encoder_manager.EncoderManager()
    encoder.load_model(configuration.model_config(
        bidirectional_encoder=bidirectional_encoder),
                       vocabulary_file=vocab_file,
                       embedding_matrix_file=embedding_matrix_file,
                       checkpoint_path=checkpoint_path)
    return encoder
Exemplo n.º 8
0
def setup_encoder():
    VOCAB_FILE = '/data/ryli/kcli/skip-thoughts/pretrained/skip_thoughts_uni_2017_02_02/vocab.txt'
    EMBEDDING_MATRIX_FILE = '/data/ryli/kcli/skip-thoughts/pretrained/skip_thoughts_uni_2017_02_02/embeddings.npy'
    CHECKPOINT_PATH = '/data/ryli/kcli/skip-thoughts/pretrained/skip_thoughts_uni_2017_02_02/model.ckpt-501424'

    encoder = encoder_manager.EncoderManager()
    encoder.load_model(configuration.model_config(),
                       vocabulary_file=VOCAB_FILE,
                       embedding_matrix_file=EMBEDDING_MATRIX_FILE,
                       checkpoint_path=CHECKPOINT_PATH)

    return encoder
Exemplo n.º 9
0
def main():

    parser = argparse.ArgumentParser(
        description="encoding sentences example for skip_thoughts.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('vocab_file', help="specify the vocab_file")
    parser.add_argument('embedding_matrix_file',
                        help='specify the embedding_matrix_file')
    parser.add_argument('checkpoint_path', help="specify the checkpoint_path")
    parser.add_argument('mr_data_dir', help="specify the mr_data_dir")
    parser.add_argument('--model_name', default="skip_thoughts")
    parser.add_argument('--bidirect',
                        choices=["True", "False"],
                        default="False")

    args = parser.parse_args()

    if args.bidirect == "True":
        args.bidirect = True
    else:
        args.bidirect = False

    encoder = encoder_manager.EncoderManager(args.model_name)
    encoder.load_model(
        configuration.model_config(bidirectional_encoder=args.bidirect),
        vocabulary_file=args.vocab_file,
        embedding_matrix_file=args.embedding_matrix_file,
        checkpoint_path=args.checkpoint_path)

    data = []
    with open(os.path.join(args.mr_data_dir, 'rt-polarity.neg'), 'rb') as f:
        data.extend([line.decode('latin-1').strip() for line in f])
    with open(os.path.join(args.mr_data_dir, 'rt-polarity.pos'), 'rb') as f:
        data.extend([line.decode('latin-1').strip() for line in f])

    encodings = encoder.encode(data)

    def get_nn(ind, num=10):
        encoding = encodings[ind]
        scores = sd.cdist([encoding], encodings, 'cosine')[0]
        sorted_ids = np.argsort(scores)
        print("Senetence:")
        print("", data[ind])
        print("\nNearest neighbors:")
        for i in range(1, num + 1):
            print(" %d. %s (%.3f)" %
                  (i, data[sorted_ids[i]], scores[sorted_ids[i]]))

    get_nn(0)
Exemplo n.º 10
0
    def __init__(self, withSVM=False):
        [lib, con,
         neu] = cPickle.load(open(os.getcwd() + '/sampleData.pkl', 'rb'))

        self.bias_dict = {}

        for tree in lib:
            sentence = tree.get_words()
            self.bias_dict[sentence] = 1

        for tree in con:
            sentence = tree.get_words()
            self.bias_dict[sentence] = -1

        for tree in neu:
            sentence = tree.get_words()
            self.bias_dict[sentence] = 0

        self.encoder = encoder_manager.EncoderManager()
        self.data_encodings = []
        self.data = self.bias_dict.keys()

        self.blacklist = []

        #f = open('skipthoughts.pkl', 'rb')
        # right now, we're using a unidirectional skip model;
        # we can try the bidirectional model later

        dir_path = os.path.dirname(os.path.realpath(__file__))

        VOCAB_FILE = dir_path + "/../data/vocab.txt"
        EMBEDDING_MATRIX_FILE = dir_path + "/../data/embeddings.npy"
        CHECKPOINT_PATH = dir_path + "/../data/model.ckpt-501424"

        self.encoder.load_model(configuration.model_config(),
                                vocabulary_file=VOCAB_FILE,
                                embedding_matrix_file=EMBEDDING_MATRIX_FILE,
                                checkpoint_path=CHECKPOINT_PATH)

        self.sentiment = SentimentIntensityAnalyzer()

        self.clf = None

        self.withSVM = withSVM

        if withSVM:
            print('using the SVM!')
            f = open('./svm.pkl', 'rb')
            self.clf = cPickle.load(f)
def main(unused_argv):
    if not FLAGS.input_file_pattern:
        raise ValueError("--input_file_pattern is required.")
    if not FLAGS.checkpoint_dir:
        raise ValueError("--checkpoint_dir is required.")
    if not FLAGS.eval_dir:
        raise ValueError("--eval_dir is required.")

    # Create the evaluation directory if it doesn't exist.
    eval_dir = FLAGS.eval_dir
    if not tf.gfile.IsDirectory(eval_dir):
        tf.logging.info("Creating eval directory: %s", eval_dir)
        tf.gfile.MakeDirs(eval_dir)

    g = tf.Graph()
    with g.as_default():
        # Build the model for evaluation.
        model_config = configuration.model_config(
            input_file_pattern=FLAGS.input_file_pattern,
            input_queue_capacity=FLAGS.num_eval_examples,
            shuffle_input_data=False)
        model = skip_thoughts_model.SkipThoughtsModel(model_config,
                                                      mode="eval")
        model.build()

        losses = tf.concat(model.target_cross_entropy_losses, 0)
        weights = tf.concat(model.target_cross_entropy_loss_weights, 0)

        # Create the Saver to restore model Variables.
        saver = tf.train.Saver()

        # Create the summary operation and the summary writer.
        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(eval_dir)

        g.finalize()

        # Run a new evaluation run every eval_interval_secs.
        while True:
            start = time.time()
            tf.logging.info(
                "Starting evaluation at " +
                time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime()))
            run_once(model, losses, weights, saver, summary_writer, summary_op)
            time_to_next_eval = start + FLAGS.eval_interval_secs - time.time()
            if time_to_next_eval > 0:
                time.sleep(time_to_next_eval)
Exemplo n.º 12
0
    def __init__(self, modelPath, checkpointPath):
        """Initialize skip though model.

        Arguments:
            modelPath {str} -- the path to model
            checkpointPath {str} -- the filename of mode.ckpt-xxxx
        """
        self.modelPath = modelPath
        self.checkpointPath = os.path.join(modelPath, "..", checkpointPath)
        self.vocabFile = os.path.join(modelPath, "vocab.txt")
        self.embeddingMatrixFile = os.path.join(modelPath, "embeddings.npy")

        self.encoder = encoder_manager.EncoderManager()
        self.encoder.load_model(configuration.model_config(),
                                vocabulary_file=self.vocabFile,
                                embedding_matrix_file=self.embeddingMatrixFile,
                                checkpoint_path=self.checkpointPath)
Exemplo n.º 13
0
def restore_skipthought(model_dir, model_name, skipthought_embedding,
                        skipthought_vocab):
    """
    :rtype: encoder_manager.EncoderManager()
    :return:
    """
    check_point_path = os.path.join(model_dir, model_name)
    skip_thought_embedding_matrix = os.path.join(model_dir,
                                                 skipthought_embedding)
    skip_thought_vocab = os.path.join(model_dir, skipthought_vocab)

    encoder = encoder_manager.EncoderManager()
    encoder.load_model(configuration.model_config(),
                       vocabulary_file=skip_thought_vocab,
                       embedding_matrix_file=skip_thought_embedding_matrix,
                       checkpoint_path=check_point_path)
    return encoder
Exemplo n.º 14
0
def main(unused_argv):
  if not FLAGS.input_file_pattern:
    raise ValueError("--input_file_pattern is required.")
  if not FLAGS.checkpoint_dir:
    raise ValueError("--checkpoint_dir is required.")
  if not FLAGS.eval_dir:
    raise ValueError("--eval_dir is required.")

  # Create the evaluation directory if it doesn't exist.
  eval_dir = FLAGS.eval_dir
  if not tf.gfile.IsDirectory(eval_dir):
    tf.logging.info("Creating eval directory: %s", eval_dir)
    tf.gfile.MakeDirs(eval_dir)

  g = tf.Graph()
  with g.as_default():
    # Build the model for evaluation.
    model_config = configuration.model_config(
        input_file_pattern=FLAGS.input_file_pattern,
        input_queue_capacity=FLAGS.num_eval_examples,
        shuffle_input_data=False)
    model = skip_thoughts_model.SkipThoughtsModel(model_config, mode="eval")
    model.build()

    losses = tf.concat(model.target_cross_entropy_losses, 0)
    weights = tf.concat(model.target_cross_entropy_loss_weights, 0)

    # Create the Saver to restore model Variables.
    saver = tf.train.Saver()

    # Create the summary operation and the summary writer.
    summary_op = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(eval_dir)

    g.finalize()

    # Run a new evaluation run every eval_interval_secs.
    while True:
      start = time.time()
      tf.logging.info("Starting evaluation at " + time.strftime(
          "%Y-%m-%d-%H:%M:%S", time.localtime()))
      run_once(model, losses, weights, saver, summary_writer, summary_op)
      time_to_next_eval = start + FLAGS.eval_interval_secs - time.time()
      if time_to_next_eval > 0:
        time.sleep(time_to_next_eval)
Exemplo n.º 15
0
	def __init__(self, withSVM=False):
		[lib, con, neu] = cPickle.load(open('sampleData.pkl', 'rb'))

		self.bias_dict = {}

		for tree in lib:
			sentence = tree.get_words()
			self.bias_dict[sentence] = 1

		for tree in con:
			sentence = tree.get_words()
			self.bias_dict[sentence] = -1

		for tree in neu:
			sentence = tree.get_words()
			self.bias_dict[sentence] = 0

		self.encoder = encoder_manager.EncoderManager()
		self.data_encodings = []
		self.data = self.bias_dict.keys()

		self.blacklist = []

		#f = open('skipthoughts.pkl', 'rb')
		# right now, we're using a unidirectional skip model;
		# we can try the bidirectional model later
		VOCAB_FILE = "/Users/az/Desktop/projects/modemo/backend/modules/tf/skip_thoughts/pretrained/skip_thoughts_uni_2017_02_02/vocab.txt"
		EMBEDDING_MATRIX_FILE = "/Users/az/Desktop/projects/modemo/backend/modules/tf/skip_thoughts/pretrained/skip_thoughts_uni_2017_02_02/embeddings.npy"
		CHECKPOINT_PATH = "/Users/az/Desktop/projects/modemo/backend/modules/tf/skip_thoughts/pretrained/skip_thoughts_uni_2017_02_02/model.ckpt-501424"

		self.encoder.load_model(configuration.model_config(), vocabulary_file=VOCAB_FILE, embedding_matrix_file=EMBEDDING_MATRIX_FILE, checkpoint_path=CHECKPOINT_PATH)

		self.sentiment = SentimentIntensityAnalyzer()

		self.clf = None

		if withSVM:
			print('using the SVM!')
			f = open('./svm.pkl', 'rb')
			self.clf = cPickle.load(f)
Exemplo n.º 16
0
def main(unused_argv):
    if not FLAGS.input_file_pattern:
        raise ValueError("--input_file_pattern is required.")
    if not FLAGS.train_dir:
        raise ValueError("--train_dir is required.")

    model_config = configuration.model_config(
        input_file_pattern=FLAGS.input_file_pattern)
    training_config = configuration.training_config()

    tf.logging.info("Building training graph.")
    g = tf.Graph()
    with g.as_default():
        model = skip_thoughts_model.SkipThoughtsModel(model_config,
                                                      mode="train")
        model.build()

        learning_rate = _setup_learning_rate(training_config,
                                             model.global_step)
        optimizer = tf.train.AdamOptimizer(learning_rate)

        train_tensor = tf.contrib.slim.learning.create_train_op(
            total_loss=model.total_loss,
            optimizer=optimizer,
            global_step=model.global_step,
            clip_gradient_norm=training_config.clip_gradient_norm)

        saver = tf.train.Saver()

    tf.contrib.slim.learning.train(
        train_op=train_tensor,
        logdir=FLAGS.train_dir,
        graph=g,
        global_step=model.global_step,
        number_of_steps=training_config.number_of_steps,
        save_summaries_secs=training_config.save_summaries_secs,
        saver=saver,
        save_interval_secs=training_config.save_model_secs)
Exemplo n.º 17
0
def main(unused_argv):
  if not FLAGS.input_file_pattern:
    raise ValueError("--input_file_pattern is required.")
  if not FLAGS.train_dir:
    raise ValueError("--train_dir is required.")

  model_config = configuration.model_config(
      input_file_pattern=FLAGS.input_file_pattern)
  training_config = configuration.training_config()

  tf.logging.info("Building training graph.")
  g = tf.Graph()
  with g.as_default():
    model = skip_thoughts_model.SkipThoughtsModel(model_config, mode="train")
    model.build()

    learning_rate = _setup_learning_rate(training_config, model.global_step)
    optimizer = tf.train.AdamOptimizer(learning_rate)

    train_tensor = tf.contrib.slim.learning.create_train_op(
        total_loss=model.total_loss,
        optimizer=optimizer,
        global_step=model.global_step,
        clip_gradient_norm=training_config.clip_gradient_norm)

    saver = tf.train.Saver()

  tf.contrib.slim.learning.train(
      train_op=train_tensor,
      logdir=FLAGS.train_dir,
      graph=g,
      global_step=model.global_step,
      number_of_steps=training_config.number_of_steps,
      save_summaries_secs=training_config.save_summaries_secs,
      saver=saver,
      save_interval_secs=training_config.save_model_secs)
Exemplo n.º 18
0
def main(unused_argv):
    if not FLAGS.train_dir:
        raise ValueError("--train_dir is required.")

    #read_vocab(FLAGS.vocab)
    model_config = configuration.model_config()
    training_config = configuration.training_config()
    ################ define discriminator model ################
    disc_model = Discriminator(sequence_length=MAXLEN,
                               num_classes=1,
                               vocab_size=model_config.vocab_size,
                               embedding_size=model_config.word_embedding_dim,
                               filter_sizes=[1, 2, 3, 4, 5, 7, 10],
                               num_filters=[100, 100, 100, 100, 100, 100, 100])

    ################# define training model #################
    model = skip_thoughts_model.SkipThoughtsModel(model_config, mode="train")
    model.build()
    learning_rate = _setup_learning_rate(training_config, model.global_step)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    variables_to_train = [v for v in tf.trainable_variables()]
    variables_to_restore = [
        v for v in tf.all_variables() if ('discriminator' not in v.name)
    ]

    print(len(variables_to_train))
    train_tensor = tf.contrib.slim.learning.create_train_op(
        total_loss=model.total_loss,
        optimizer=optimizer,
        clip_gradient_norm=training_config.clip_gradient_norm,
        variables_to_train=variables_to_train)

    ######################define target lstm ####################
    #target_lstm = skip_thoughts_model.TargetLSTM(config=model_config)
    #synthesized = True
    target_lstm = None
    synthesized = False
    ################ define testing model ################
    #model_config_test = configuration.model_config()
    #model_test = skip_thoughts_model.SkipThoughtsModel(model_config_test, mode="eval")
    #model_test.build(is_testing=True)

    ################ define savers ################
    reloader = tf.train.Saver(var_list=variables_to_restore)
    reloader_all = tf.train.Saver()
    saver = tf.train.Saver(max_to_keep=1000)
    gpu_config = tf.ConfigProto(gpu_options=tf.GPUOptions(
        per_process_gpu_memory_fraction=1.0, allow_growth=True),
                                allow_soft_placement=True,
                                log_device_placement=False)

    init_op = tf.global_variables_initializer()
    sess = tf.Session(config=gpu_config)
    run_metadata = tf.RunMetadata()
    sess.run(init_op,
             options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE,
                                   output_partition_graphs=True),
             run_metadata=run_metadata)
    with open("/tmp/meta.txt", 'w') as f:
        f.write(str(run_metadata))

    if FLAGS.reload_model:
        reloader.restore(sess, FLAGS.reload_model)
    if FLAGS.reload_model_all:
        reloader_all.restore(sess, FLAGS.reload_model_all)

    ################ load training data ##############
    train_data_loader = DataLoader(128)
    train_data_loader.load(FLAGS.train_corpus_en, FLAGS.train_corpus_fr)

    total_loss_sup_list = []
    total_loss_rl_list = []
    bleu_list = []
    fake_list, real_list, neglikely_list = [], [], []

    outf = open(os.path.join(FLAGS.train_dir, 'log.txt'), 'a')
    logf = open(os.path.join(FLAGS.train_dir, 'debug_log.txt'), 'w')

    ############### run training and testing #############
    for i in xrange(1000000):
        model_prefix = ""
        if i < FLAGS.pretrain_G_steps:
            model_prefix = "preG_"
            np_global_step, total_loss_sup, total_loss_rl, avg_bleu, avg_fake, avg_real, avg_neglikely = my_train_step(
                sess,
                train_tensor,
                model,
                train_data_loader,
                logf,
                train_sup=True,
                train_rl=False,
                disc_model=disc_model,
                adjustD=False,
                adjustG=True,
                given_num=MAXLEN)

        elif i < FLAGS.pretrain_G_steps + FLAGS.pretrain_D_steps:
            model_prefix = "preD_"
            np_global_step, total_loss_sup, total_loss_rl, avg_bleu, avg_fake, avg_real, avg_neglikely = my_train_step(
                sess,
                train_tensor,
                model,
                train_data_loader,
                logf,
                train_sup=False,
                train_rl=True,
                disc_model=disc_model,
                adjustD=True,
                adjustG=False,
                given_num=0)

        elif FLAGS.mixer_period and FLAGS.mixer_step and FLAGS.mixer_period > 0:
            gn = default_given_num - (
                i - FLAGS.pretrain_G_steps - FLAGS.pretrain_D_steps
            ) // FLAGS.mixer_period * FLAGS.mixer_step
            if gn < 0: gn = 0
            model_prefix = "mixGN" + str(gn) + "_"
            if i % 10 == 0:
                adjustD = FLAGS.adjustD
            else:
                adjustD = False
            if i % 200 == 0:
                print("gn=", gn)
            np_global_step, total_loss_sup, total_loss_rl, avg_bleu, avg_fake, avg_real, avg_neglikely  = my_train_step( \
                  sess, train_tensor, model, train_data_loader, logf, train_sup=False, train_rl=True, \
                  disc_model=disc_model, adjustD=adjustD, adjustG=FLAGS.adjustG, given_num=gn)

        else:
            model_prefix = ""
            np_global_step, total_loss_sup, total_loss_rl, avg_bleu, avg_fake, avg_real, avg_neglikely = my_train_step(
                sess,
                train_tensor,
                model,
                train_data_loader,
                logf,
                train_sup=False,
                train_rl=True,
                disc_model=disc_model,
                adjustD=FLAGS.adjustD,
                adjustG=FLAGS.adjustG)

        total_loss_sup_list.append(total_loss_sup)
        total_loss_rl_list.append(total_loss_rl)
        fake_list.append(avg_fake)
        real_list.append(avg_real)
        bleu_list.append(avg_bleu)
        neglikely_list.append(avg_neglikely)

        if np_global_step % 2000 == 0:
            saver.save(
                sess,
                os.path.join(FLAGS.train_dir,
                             model_prefix + "model-" + str(np_global_step)))
        if np_global_step % 20 == 0:
            # my_test_step(sess, model_test, FLAGS.test_result+'-'+str(np_global_step))
            print(np_global_step, np.mean(total_loss_sup_list),
                  np.mean(total_loss_rl_list))
            print(np.mean(bleu_list), np.mean(fake_list), np.mean(real_list))
            print(np.mean(neglikely_list))
            outf.write(
                str(np_global_step) + " " + str(np.mean(total_loss_sup_list)) +
                " " + str(np.mean(total_loss_rl_list)) + " " +
                str(np.mean(bleu_list)) + " " + str(np.mean(fake_list)) + " " +
                str(np.mean(real_list)) + " " + str(np.mean(neglikely_list)) +
                "\n")
            total_loss_sup_list, total_loss_rl_list, bleu_list, fake_list, real_list, neglikely_list = [],[],[],[],[],[]
Exemplo n.º 19
0
    # traditional measurement like levenstein distance, dynamic time wrapping, jaro, etc.

    print(_generate_log("Average Embedding", ae_sims, sim_names))
    print(_generate_log("InferSent", inf_sims, sim_names))
    print(_generate_log("SkipThought", st_sims, sim_names))


if __name__ == '__main__':
    # Load in InferSent
    infersent = torch.load(MODEL_PATH)  # rely on "models.py" as well
    infersent.set_glove_path(GLOVE_PATH)

    # Load in SkipThought
    config_gpu = tf.ConfigProto()
    config_gpu.gpu_options.allow_growth = True

    with tf.Graph().as_default(), tf.Session(config=config_gpu) as session:
        skipthought = encoder_manager.EncoderManager()

        skipthought.load_model(
            configuration.model_config(bidirectional_encoder=True),
            vocabulary_file=VOCAB_FILE,
            embedding_matrix_file=EMBEDDING_MATRIX_FILE,
            checkpoint_path=CHECKPOINT_PATH)

    # Load in average embedding
    avg_emb = AverageEmbedder(word_emb_dim=300)
    avg_emb.set_glove_path(GLOVE_PATH)

    IPython.embed()
Exemplo n.º 20
0
import pandas as pd
from skip_thoughts import configuration
from skip_thoughts import encoder_manager
from sklearn.feature_extraction.text import TfidfVectorizer

VOCAB_FILE = ".\\skip_thoughts_bi_2017_02_16\\vocab.txt"
EMBEDDING_MATRIX_FILE = ".\\skip_thoughts_bi_2017_02_16\\embeddings.npy"
CHECKPOINT_PATH = ".\\skip_thoughts_bi_2017_02_16\\model.ckpt-500008"

encoder = encoder_manager.EncoderManager()
encoder.load_model(configuration.model_config(bidirectional_encoder=True),vocabulary_file=VOCAB_FILE,embedding_matrix_file=EMBEDDING_MATRIX_FILE,checkpoint_path=CHECKPOINT_PATH)

def neural_features(dataset_loc):

	english_dataset = pd.read_csv(dataset_loc)
	headline = english_dataset['headline']
	body = english_dataset['content']
	labels = [int(x) for x in english_dataset['label']]

	labels_done = []
    flag = True
    body_encodings = np.zeros((len(body),2400))
    j = 0
    for i in range(len(body)):
        flag=True
        try:
            current_body_encoding = encoder.encode(body[i:i+1])
        except:
            flag=False
        
Exemplo n.º 21
0
def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def _float_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))


VOCAB_FILE = "skip_thoughts_model/pretrained/skip_thoughts_uni_2017_02_02/vocab.txt"
EMBEDDING_MATRIX_FILE = "skip_thoughts_model/pretrained/skip_thoughts_uni_2017_02_02/embeddings.npy"
CHECKPOINT_PATH = "skip_thoughts_model/pretrained/skip_thoughts_uni_2017_02_02/model.ckpt-501424"


print ("loading model ...")
encoder = encoder_manager.EncoderManager()
encoder.load_model(configuration.model_config(),
                   vocabulary_file=VOCAB_FILE,
                   embedding_matrix_file=EMBEDDING_MATRIX_FILE,
                   checkpoint_path=CHECKPOINT_PATH)


data_name, name_to_annotations, _ = pickle.load(open("data/train_info.pkl", "rb"))


tfrecords_filename = 'train_image.tfrecords'

writer = tf.python_io.TFRecordWriter(tfrecords_filename)


img_dir = 'data/train2014/'
Exemplo n.º 22
0
skipthought_checkpoint_path = 'checkpoints/skipthought/model.ckpt-501424'


# Loading Captions for COCO
#cap_path = 'coco/annotations/captions_train2014.json'
#cat_path = 'coco/annotations/instances_train2014.json'
#print('Loading Captions')
#coco_cap = COCO(cap_path)
#captions = coco_cap.loadAnns(coco_cap.getAnnIds())
#words = [caption['caption'] for caption in captions]

# CUB-2011 Captions generated by the model - Show & Tell [Vinyals et. al 2016]
cap_path = 'bird_caps_big.npy'

encoder = encoder_manager.EncoderManager()
encoder.load_model(configuration.model_config(), vocabulary_file=skipthought_vocab_path,
                   embedding_matrix_file=skipthought_embedding_matrix_path,
                   checkpoint_path=skipthought_checkpoint_path)

compressor = PCA(n_components=128)
print('Loading Captions')
bird_caps = np.load(cap_path)
print('loaded captions -', len(bird_caps))
words = [bird_cap[0] for bird_cap in bird_caps]
##words = [key for key in dictionary.keys()]
encodings = encoder.encode(words, verbose=True)
print('Performing PCA...')
compressor.fit(np.array(encodings))
encodings = compressor.transform(encodings)
mapping = []
 def setUp(self):
     super(SkipThoughtsModelTest, self).setUp()
     self._model_config = configuration.model_config()
Exemplo n.º 24
0
 def setUp(self):
   super(SkipThoughtsModelTest, self).setUp()
   self._model_config = configuration.model_config()
Exemplo n.º 25
0
def main(unused_argv):
    if not FLAGS.input_file_pattern:
        raise ValueError("--input_file_pattern is required.")
    if not FLAGS.run_dir:
        raise ValueError("--run_dir is required.")
    if not FLAGS.decoder:
        raise ValueError("--decoder is required.")

    if not FLAGS.train_dir:
        train_dir = os.path.join(FLAGS.run_dir,
                                 'run_{t}'.format(t=time.time()))
        tf.logging.info(
            "No specified --train_dir. Creating {d}.".format(d=train_dir))
        os.makedirs(train_dir)

        write_config(train_dir=train_dir, flags=FLAGS)

    else:
        tf.logging.info("Specified --train_dir {d}; Not autocreating.".format(
            d=FLAGS.train_dir))
        train_dir = FLAGS.train_dir

    decoder_config = experiments.get_decoder_config(flags=FLAGS)
    model_config = configuration.model_config(
        input_file_pattern=FLAGS.input_file_pattern,
        vocab_size=FLAGS.vocab_size,
        batch_size=FLAGS.batch_size,
        word_embedding_dim=FLAGS.word_dim,
        pretrained_word_emb_file=FLAGS.pretrained_word_emb_file,
        word_emb_trainable=FLAGS.word_emb_trainable,
        encoder_dim=FLAGS.encoder_dim,
        skipgram_encoder=FLAGS.skipgram_encoder,
        sequence_decoder_pre=decoder_config.sequence_decoder_pre,
        sequence_decoder_cur=decoder_config.sequence_decoder_cur,
        sequence_decoder_post=decoder_config.sequence_decoder_post,
        skipgram_decoder_pre=decoder_config.skipgram_decoder_pre,
        skipgram_decoder_cur=decoder_config.skipgram_decoder_cur,
        skipgram_decoder_post=decoder_config.skipgram_decoder_post,
        share_weights_logits=FLAGS.share_weights_logits,
        normalise_decoder_losses=FLAGS.normalise_decoder_losses,
        skipgram_prefactor=FLAGS.skipgram_prefactor,
        sequence_prefactor=FLAGS.sequence_prefactor)
    training_config = configuration.training_config(
        number_of_steps=FLAGS.number_of_steps)

    tf.logging.info("Building training graph.")
    g = tf.Graph()
    with g.as_default():
        tf.set_random_seed(1234)
        model = skip_thoughts_model.SkipThoughtsModel(model_config,
                                                      mode="train")
        model.build()

        learning_rate = _setup_learning_rate(training_config,
                                             model.global_step)
        optimizer = tf.train.AdamOptimizer(learning_rate)

        train_tensor = tf.contrib.slim.learning.create_train_op(
            total_loss=model.total_loss,
            optimizer=optimizer,
            global_step=model.global_step,
            clip_gradient_norm=training_config.clip_gradient_norm,
            summarize_gradients=True,
            check_numerics=True)

        saver = tf.train.Saver()

    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_fraction)

    tf.contrib.slim.learning.train(
        train_op=train_tensor,
        logdir=train_dir,
        graph=g,
        global_step=model.global_step,
        number_of_steps=training_config.number_of_steps,
        session_config=tf.ConfigProto(gpu_options=gpu_options),
        save_summaries_secs=training_config.save_summaries_secs,
        saver=saver,
        save_interval_secs=training_config.save_model_secs)
Exemplo n.º 26
0
def main(unused_argv):
  if not FLAGS.input_file_pattern:
    raise ValueError("--input_file_pattern is required.")
  if not FLAGS.train_dir:
    raise ValueError("--train_dir is required.")

  model_config = configuration.model_config(
      input_file_pattern=FLAGS.input_file_pattern, bidirectional_encoder=True)
  training_config = configuration.training_config()

  tf.logging.info("Building training graph.")
  g = tf.Graph()
  with g.as_default():
    model = skip_thoughts_model.SkipThoughtsModel(model_config, mode="train")
    model.build()
     
    encoder_variables = [v for v in tf.global_variables()
                    if v.name.startswith("encoder") and "Adam" not in v.name]
    embedding_variables = [v for v in tf.global_variables()
                           if v.name.startswith("word_embedding") and "Adam" not in v.name]
    print([v.name for v in (encoder_variables+embedding_variables)])

    learning_rate = _setup_learning_rate(training_config, model.global_step)
    optimizer = tf.train.AdamOptimizer(learning_rate)

    
    encoder_mult = 0.1
    embedding_mult = 0.01
    multiply = dict([(v, encoder_mult) for v in encoder_variables] + [(v, embedding_mult) for v in embedding_variables])

    train_tensor = tf.contrib.slim.learning.create_train_op(
        total_loss=model.total_loss,
        optimizer=optimizer,
        gradient_multipliers=multiply,
        global_step=model.global_step,
        clip_gradient_norm=training_config.clip_gradient_norm)

    saver = tf.train.Saver()
    model_path = tf.train.latest_checkpoint(FLAGS.train_dir)
    
    pretrain_saver = tf.train.Saver(encoder_variables+embedding_variables)

  print(model_path)
  if model_path:
    def restore_fn(sess):
       tf.logging.info(
      "Restoring SA&T variables from checkpoint file")
       saver.restore(sess, model_path)
  else:
    def restore_fn(sess):
      tf.logging.info(
        "Restoring SA&T variables from pretrained model")
      #saver.restore(sess, "/home/ubuntu/code/A_skip_thoughts_2/skip_thoughts/model/backup/run1/model.ckpt-2111")
      pretrain_saver.restore(sess, "/home/ubuntu/code/pretrained/bi/model.ckpt-500008")
  
  tf.contrib.slim.learning.train(
      train_op=train_tensor,
      logdir=FLAGS.train_dir,
      graph=g,
      global_step=model.global_step,
      number_of_steps=training_config.number_of_steps,
      save_summaries_secs=training_config.save_summaries_secs,
      saver=saver,
      save_interval_secs=training_config.save_model_secs,
      init_fn = restore_fn)
Exemplo n.º 27
0
FLAGS_PICKLE_PATH = os.path.join(FLAGS.model_dir, "flags.pkl")

# Load the configuration used to make the model
with open(FLAGS_PICKLE_PATH, 'r') as f:
    model_flags = cPickle.load(f)

decoder_config = experiments.get_decoder_config(flags=model_flags)
model_config = configuration.model_config(
    input_file_pattern=model_flags.input_file_pattern,
    vocab_size=model_flags.vocab_size,
    batch_size=model_flags.batch_size,
    word_embedding_dim=model_flags.word_dim,
    encoder_dim=model_flags.encoder_dim,
    skipgram_encoder=model_flags.skipgram_encoder,
    sequence_decoder_pre=decoder_config.sequence_decoder_pre,
    sequence_decoder_cur=decoder_config.sequence_decoder_cur,
    sequence_decoder_post=decoder_config.sequence_decoder_post,
    skipgram_decoder_pre=decoder_config.skipgram_decoder_pre,
    skipgram_decoder_cur=decoder_config.skipgram_decoder_cur,
    skipgram_decoder_post=decoder_config.skipgram_decoder_post,
    share_weights_logits=model_flags.share_weights_logits,
    normalise_decoder_losses=model_flags.normalise_decoder_losses,
    skipgram_prefactor=model_flags.skipgram_prefactor,
    sequence_prefactor=model_flags.sequence_prefactor)

# Set up the encoder. Here we are using a single unidirectional model.
# To use a bidirectional model as well, call load_model() again with
# configuration.model_config(bidirectional_encoder=True) and paths to the
# bidirectional model's files. The encoder will use the concatenation of
# all loaded models.
encoder = encoder_manager.EncoderManager()
Exemplo n.º 28
0
def main(unused_argv):
    if not FLAGS.input_file_pattern:
        raise ValueError("--input_file_pattern is required.")
    if not FLAGS.train_dir:
        raise ValueError("--train_dir is required.")

    model_config = configuration.model_config(
        input_file_pattern=FLAGS.input_file_pattern)
    training_config = configuration.training_config()

    tf.logging.info("Building training graph.")
    g = tf.Graph()
    with g.as_default():
        grads_tower = []
        for dev_ind in range(4):
            with tf.device('/gpu:%d' % dev_ind):
                model = skip_thoughts_model.SkipThoughtsModel(model_config,
                                                              mode="train")
                model.build()

                learning_rate = _setup_learning_rate(training_config,
                                                     model.global_step)
                optimizer = tf.train.AdamOptimizer(learning_rate)

                total_loss = model.total_loss
                # Update ops use GraphKeys.UPDATE_OPS collection if update_ops is None.
                update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))

                # Make sure update_ops are computed before total_loss.
                if update_ops:
                    with ops.control_dependencies(update_ops):
                        barrier = control_flow_ops.no_op(name='update_barrier')
                    total_loss = control_flow_ops.with_dependencies([barrier],
                                                                    total_loss)

                variables_to_train = tf_variables.trainable_variables()

                assert variables_to_train

                gate_gradients = tf_optimizer.Optimizer.GATE_OP
                # Create the gradients. Note that apply_gradients adds the gradient
                # computation to the current graph.
                grads = optimizer.compute_gradients(
                    total_loss,
                    variables_to_train,
                    gate_gradients=gate_gradients,
                    aggregation_method=None,
                    colocate_gradients_with_ops=False)

                grads = tf.contrib.slim.learning.clip_gradient_norms(
                    grads, training_config.clip_gradient_norm)

                grads_tower.append(grads)

        avg_grads = average_gradients.average_gradients(grads_tower)
        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(avg_grads,
                                                 global_step=model.global_step)

        with ops.name_scope('train_op'):
            # Make sure total_loss is valid.
            total_loss = array_ops.check_numerics(total_loss,
                                                  'LossTensor is inf or nan')

            # Ensure the train_tensor computes grad_updates.
            train_op = control_flow_ops.with_dependencies([grad_updates],
                                                          total_loss)

        # Add the operation used for training to the 'train_op' collection
        train_ops = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
        if train_op not in train_ops:
            train_ops.append(train_op)

        saver = tf.train.Saver()

    tf.contrib.slim.learning.train(
        train_op=train_op,
        logdir=FLAGS.train_dir,
        graph=g,
        global_step=model.global_step,
        number_of_steps=training_config.number_of_steps,
        save_summaries_secs=training_config.save_summaries_secs,
        saver=saver,
        save_interval_secs=training_config.save_model_secs)