def data_construct(input_file, batch_size, vocab, max_length=100, config=None): logger.info('Loading data...') x_test, contents, labels, y_test = data_helpers.load_test_data( input_file, max_length, vocab, config) if config: test_iter = mx.io.NDArrayIter(x_test, y_test, batch_size) else: test_iter = mx.io.NDArrayIter(x_test, None, batch_size) return test_iter, contents, labels
def main(): x = data_helpers.load_test_data('data/test.csv') a = inference(x) print(a) np.savetxt('submission_softmax.csv', np.c_[range(1, len(x) + 1), a], delimiter=',', header='ImageId,Label', comments='', fmt='%d')
def main(): trained_model = "checkpoints/model.ckpt" embedding_size = 100 # Word embedding dimension batch_size = 128 # Batch data size sequence_length = 300 # Sentence length rnn_size = 50 # Number of hidden layer neurons attention_matrix_size = 100 margin = 0.1 gpu_mem_usage = 0.75 gpu_device = "/gpu:0" embeddings, word2idx = data_helpers.load_embedding('vectors.nobin') voc = data_helpers.load_vocab( 'D:\\DataMining\\Datasets\\insuranceQA\\V1\\vocabulary') all_answers = data_helpers.load_answers( 'D:\\DataMining\\Datasets\\insuranceQA\\V1\\answers.label.token_idx', voc) questions, answers, labels, qids, aids = data_helpers.load_test_data( 'D:\\DataMining\\Datasets\\insuranceQA\\V1\\question.test1.label.token_idx.pool', all_answers, voc, word2idx, 300) with tf.Graph().as_default(), tf.device(gpu_device): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_mem_usage) session_conf = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) model = QALSTM(batch_size, sequence_length, embeddings, embedding_size, rnn_size, margin, attention_matrix_size) with tf.Session(config=session_conf).as_default( ) as sess: # config=session_conf saver = tf.train.Saver() print("Start loading the model") saver.restore(sess, trained_model) print("The model is loaded") scores = [] for question, answer in data_helpers.test_batch_iter( questions, answers, batch_size): feed_dict = {model.qtest: question, model.atest: answer} score = sess.run([model.scores], feed_dict) scores.extend(score[0].tolist()) MAP, MRR = eval_map_mrr(qids, aids, scores, labels) print('MAP %2.3f\tMRR %2.3f' % (MAP, MRR))
FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") # Data Preparatopn # ================================================== # Load data print("Loading data...") x_text, y = data_helpers.load_data_and_labels(FLAGS.positive_data_file, FLAGS.negative_data_file) x_eval = data_helpers.load_test_data(FLAGS.test_data_file) # Pad sentences sentences_padded_all, max_length = data_helpers.pad_sentences(x_text + x_eval) sentences_padded, max_length = data_helpers.pad_sentences(x_text, max_length) # Build vocabulary vocabulary, vocabulary_inv = data_helpers.build_vocab(sentences_padded_all) x, y = data_helpers.build_input_data(sentences_padded, y, vocabulary) # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices]
seq_len = len(tmp_list) if seq_len > max_len: seq_len = max_len real_len.append(seq_len) return real_len def load_train_params(train_dir): sorted_label = json.loads(open(train_dir + '/sorted_label.json').read()) train_params = json.loads(open(train_dir + '/train_params.json').read()) return sorted_label,train_params # CHANGE THIS: Load data. Load your own data here if FLAGS.eval_train: train_dir = os.path.join(FLAGS.checkpoint_dir, "..", "trained_results") sorted_label, train_params = load_train_params(train_dir) x_raw, y_test = data_helpers.load_test_data(FLAGS.test_data_file, sorted_label) y_test = np.argmax(y_test, axis=1) else: x_raw = ["a masterpiece four years in the making", "everything is off."] y_test = [1, 0] # Map data into vocabulary vocab_path = os.path.join(FLAGS.checkpoint_dir, "..", "vocab") vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path) x_real_len_test = np.array(get_real_len(x_raw, train_params['max_document_length'])) x_test = np.array(list(vocab_processor.transform(x_raw))) print("\nEvaluating...\n") # Evaluation # ================================================== checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
def eval(): # Map data into vocabulary source_vocab_path = os.path.join(FLAGS.checkpoint_dir, "..", "source_vocab") source_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor.restore( source_vocab_path) source_max_sentence_length = len( list(source_vocab_processor.transform(['test']))[0]) target_vocab_path = os.path.join(FLAGS.checkpoint_dir, "..", "target_vocab") target_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor.restore( target_vocab_path) target_max_sentence_length = len( list(target_vocab_processor.transform(['test']))[0]) with tf.device('/cpu:0'): source_sent, target_sent = data_helpers.load_test_data( FLAGS.test_source_dir, FLAGS.test_target_dir, source_max_sentence_length, target_max_sentence_length) source_eval = np.array(list(source_vocab_processor.transform(source_sent))) target_eval = np.array(list(target_vocab_processor.transform(target_sent))) checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name source = graph.get_operation_by_name("encoder_x").outputs[0] target = graph.get_operation_by_name("decoder_y").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name( "output/predictions").outputs[0] # Generate batches for one epoch batches = data_helpers.batch_iter(list(source_eval), FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here all_predictions = np.empty([0, target_max_sentence_length], int) for batch in batches: # auto-regressive infer batch_predictions = np.ones_like(batch) for j in range(target_max_sentence_length): pred = sess.run(predictions, feed_dict={ source: batch, target: batch_predictions }) batch_predictions[:, j] = pred[:, j] all_predictions = np.concatenate( [all_predictions, batch_predictions]) is_target = np.not_equal(target_eval, 0).astype(float) accuracy = np.sum( np.equal(all_predictions, target_eval).astype(float) * is_target) / np.sum(is_target) print("Total number of test examples: {}\n".format( len(target_eval))) print("Accuracy: {:g}".format(accuracy)) prediction_sent = [] for idx_seq in all_predictions: prediction_sent.append(" ".join( target_vocab_processor.vocabulary_.reverse(idx) for idx in idx_seq)) # BLEU Score list_of_references = [] hypotheses = [] for pred, target in zip(prediction_sent, target_sent): if len(pred.split()) > 3 and len(target.split()) > 3: list_of_references.append([pred.split()]) hypotheses.append(target.split()) chencherry = SmoothingFunction() score = corpus_bleu(list_of_references, hypotheses, smoothing_function=chencherry.method4) print("BLEU Score : {:g}\n".format(score * 100)) # Samples of Translation Result if not os.path.exists('results'): os.mkdir('results') f = open(FLAGS.output_dir, 'w') for idx, (s, t, p) in enumerate( zip(source_sent, target_sent, prediction_sent)): f.write("Sample #%d\n" % idx) f.write("Source : %s\n" % s) f.write("Target : %s\n" % t) f.write("Predict : %s\n\n" % p) f.close()
def predict(data, params_path=FLAGS.checkpoint_dir): num_sent = len(data) mask = np.ones(shape=[FLAGS.sequence_length]).nonzero() with tf.device('/cpu:0'): x_text_ta, x_position_ta, x_id_ta, x_text_av, x_position_av, x_id_av = data_helpers.load_test_data(data) checkpoint_file = params_path + '/' # find Time & Attribute tuples checkpoint_file_ta = checkpoint_file + 'model_ta_final' graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file_ta)) saver.restore(sess, checkpoint_file_ta) # Get the placeholders from the graph by name input_text = graph.get_operation_by_name("input_text").outputs[0] input_position = graph.get_operation_by_name("input_position").outputs[0] input_mask = graph.get_operation_by_name("position_mask").outputs[0] emb_dropout_keep_prob = graph.get_operation_by_name("emb_dropout_keep_prob").outputs[0] rnn_dropout_keep_prob = graph.get_operation_by_name("rnn_dropout_keep_prob").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("output/predictions").outputs[0] probablities = graph.get_operation_by_name("output/probabilities").outputs[0] # Generate batches for one epoch batches = data_helpers.batch_iter(x_text_ta, x_position_ta, None, FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here preds = [] probs = [] for x_batch in batches: x_text_batch, x_position_batch = x_batch pred, prob = sess.run([predictions, probablities], {input_text: x_text_batch, input_position: x_position_batch, input_mask: mask, emb_dropout_keep_prob: 1.0, rnn_dropout_keep_prob: 1.0, dropout_keep_prob: 1.0}) preds.append(pred) probs.append(prob[:, 1]) preds = np.concatenate(preds) probs = np.concatenate(probs) time_attr = np.concatenate((x_position_ta, probs[:, None]), axis=1) mask_ta = np.where(preds == 1) id_ta = x_id_ta[mask_ta].copy() time_attr = time_attr[mask_ta].copy() # find Attribute & Value tuples checkpoint_file_av = checkpoint_file + 'model_av_final' graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file_av)) saver.restore(sess, checkpoint_file_av) # Get the placeholders from the graph by name input_text = graph.get_operation_by_name("input_text").outputs[0] input_position = graph.get_operation_by_name("input_position").outputs[0] input_mask = graph.get_operation_by_name("position_mask").outputs[0] emb_dropout_keep_prob = graph.get_operation_by_name("emb_dropout_keep_prob").outputs[0] rnn_dropout_keep_prob = graph.get_operation_by_name("rnn_dropout_keep_prob").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("output/predictions").outputs[0] probablities = graph.get_operation_by_name("output/probabilities").outputs[0] # Generate batches for one epoch batches = data_helpers.batch_iter(x_text_av, x_position_av, None, FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here preds = [] probs = [] for x_batch in batches: x_text_batch, x_position_batch = x_batch pred, prob = sess.run([predictions, probablities], {input_text: x_text_batch, input_position: x_position_batch, input_mask: mask, emb_dropout_keep_prob: 1.0, rnn_dropout_keep_prob: 1.0, dropout_keep_prob: 1.0}) preds.append(pred) probs.append(prob[:, 1]) preds = np.concatenate(preds) probs = np.concatenate(probs) attr_value = np.concatenate((x_position_av, probs[:, None]), axis=1) mask_av = np.where(preds == 1) id_av = x_id_av[mask_av].copy() attr_value = attr_value[mask_av].copy() # combining (time, attribute, value) tuples two_tuples = [] for id in range(num_sent): mask_ta = np.where(id_ta == id) mask_av = np.where(id_av == id) two_tuples.append({'time_attr': time_attr[mask_ta], 'attr_val': attr_value[mask_av]}) results = [] for item in two_tuples: results.append(Two2Three(item)) for id in range(num_sent): data[id]['results'] = results[id].tolist() return data
def test_cnn(test_examples, test_labels, checkpoint_file, vocabulary): # Eval Parameters #tf.flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)") #tf.flags.DEFINE_string("checkpoint_dir", checkpoint_dir, "checkpoint directory from training run") # Misc Parameters #tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") #tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") # Load data. Load your own data here print("Loading data...") x_test, y_test, vocabulary, vocabulary_inv = data_helpers.load_test_data( test_examples, test_labels, vocabulary) #x_test, y_test, vocabulary, vocabulary_inv = data_helpers.load_data() y_test = np.argmax(y_test, axis=1) print("Vocabulary size: {:d}".format(len(vocabulary))) print("Test set size {:d}".format(len(y_test))) print("\nEvaluating...\n") # Evaluation # ================================================== #checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) graph = tf.Graph() #with graph.as_default(), tf.device('/gpu:2'): with graph.as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.10, # allow_growth = True) gpu_options = tf.GPUOptions(allow_growth=True) session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options) sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name( "output/predictions").outputs[0] # Generate batches for one epoch batches = data_helpers.batch_iter(x_test, FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, { input_x: x_test_batch, dropout_keep_prob: 1.0 }) all_predictions = np.concatenate( [all_predictions, batch_predictions]) # Print accuracy and fscores correct_predictions = float(sum(all_predictions == y_test)) print("Total number of test examples: {}".format(len(y_test))) print("Accuracy: {:g}".format(correct_predictions / float(len(y_test)))) """ gold_labels = [] for gold_label in y_test: gold_labels.append(np.argmax(gold_label)) predicted_labels = [] for prediction in all_predictions: predicted_labels.append(np.argmax(prediction)) """ return all_predictions
initializer(name, arg_dict[name]) param_blocks.append( (i, arg_dict[name], args_grad[name], name) ) data = cnn_exec.arg_dict['data'] label = cnn_exec.arg_dict['softmax_label'] return CNNModel(cnn_exec=cnn_exec, symbol=cnn, data=data, label=label, param_blocks=param_blocks) pkl_file = open('vocab.pkl', 'rb') vocab = pickle.load(pkl_file) pkl_file.close() sentence=data_helpers.load_test_data() sentences_padded = data_helpers.pad_sentences(sentence) sentence_test=[] for sent in sentences_padded: l=[] for word in sent: if word in vocab: l.append(vocab[word]) else: l.append(0) sentence_test.append(l) sentence_test=np.array(sentence_test) vocab_size = len(vocab) num_embed = 50 batch_size = 100
tf.app.flags.DEFINE_boolean('log_device_placement', False, 'Demonstrate which variables are on what device.') # Store all elemnts in FLAG structure! FLAGS = tf.app.flags.FLAGS if not os.path.isabs(FLAGS.train_dir): raise ValueError('You must assign absolute path for --train_dir') if not os.path.isabs(FLAGS.checkpoint_dir): raise ValueError('You must assign absolute path for --checkpoint_dir') maybe_download_and_extract() images_train, cls_train, labels_train = load_training_data() images_test, cls_test, labels_test = load_test_data() tensors_key = ['images_train', 'labels_train', 'images_test', 'labels_test'] tensors = [images_train, labels_train, images_test, labels_test] data = dict(zip(tensors_key, tensors)) num_train_samples = images_train.shape[0] height = 32 width = 32 num_channels = 3 print(load_class_names()) graph = tf.Graph() with graph.as_default(): global_step = tf.Variable(0, name="global_step", trainable=False)
"Checkpoint directory from training run") # Misc Parameters tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") x_raw = data_helpers.load_test_data( '/Users/Winnerineast/Documents/haodaifu/NewData/tobetrained.csv') # Map data into vocabulary vocab_path = os.path.join(FLAGS.checkpoint_dir, "..", "vocab") vocabulary, vocabulary_inv, max_length = data_helpers.restore_vocabulary( vocab_path) sentences_padded, tmp_length = data_helpers.pad_sentences(x_raw, max_length) x_test, y_test = data_helpers.build_input_data(sentences_padded, None, vocabulary) print("\nEvaluating...\n") # Evaluation # ================================================== checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) graph = tf.Graph()