def cv_evaluate(): path = 'pretrained/cv/VisionModel' reader = InstanceReader("data/cv/testing.csv") batch_size = 64 epochs = reader.length / batch_size sess = tf.Session() model = VisionModel(num_classes=200) model.restore(sess, path) total_acc = 0.0 total_cnt = 0.0 for step, pi in etc.range(epochs): # Get a batch of training instances. _, batch_images, batch_labels = reader.read(lines=batch_size) # Calculate batch accuracy and loss acc, loss = model.evaluate(sess, batch_images, batch_labels) total_acc += acc total_cnt += 1.0 print("Iter " + str(1 + step) + " / " + str(epochs) + \ ", Validation Accuracy= " + \ "{:.5f}".format(total_acc / total_cnt) + ", Time Remaining= " + \ etc.format_seconds(pi.time_remaining()), file = sys.stderr)
def cv_extract_features(): path = 'pretrained/cv/VisionModel' reader = InstanceReader("data/cv/all.csv") batch_size = 1 epochs = reader.length / batch_size sess = tf.Session() model = VisionModel(num_classes=200) model.restore(sess, path) cache = FeatureCache() for step, pi in etc.range(epochs): # Get a batch of testing instances. batch_ids, batch_images, _ = reader.read(lines=batch_size) # Retrieve the first path from the batch. path = batch_images[0] # Calculate batch accuracy and loss label, features, confidence = model.predict(sess, path) cache.set(batch_ids[0], label[0], features) print("Iter " + str(1 + step) + " / " + str(epochs) + ", Time Remaining= " + \ etc.format_seconds(pi.time_remaining()), file = sys.stderr) cache.save("data/cv/features.csv")
def cv_train(): path = 'data/inception_resnet_v2_2016_08_30.ckpt' batch_size = 128 reader = InstanceReader("data/cv/training.csv") epochs = reader.length / 128 * 10 def get_batch(batch_size): return reader.read(lines=batch_size) sess = tf.Session() model = VisionModel(num_classes=200) model.restore(sess, path, last_layer=False) for step, pi in etc.range(epochs): # Get a batch of training instances. batch_ids, batch_images, batch_labels = get_batch(batch_size) model.train(sess, batch_images, batch_labels) # Calculate batch accuracy and loss acc, loss = model.evaluate(sess, batch_images, batch_labels) print("Iter " + str(1 + step) + " / " + str(epochs) + \ ", Minibatch Loss= " + \ "{:.6f}".format(loss) + ", Training Accuracy= " + \ "{:.5f}".format(acc) + ", Time Remaining= " + \ etc.format_seconds(pi.time_remaining()), file = sys.stderr) model.save(sess, 1 + step) print("Optimization Finished!", file=sys.stderr)
def sc_evaluate(): max_length = 30 num_classes = 200 embedding_size = len(Vocabulary().restore("data/ds/vocabulary.csv")) reader = SentenceReader("data/sc/testing.csv", num_classes, embedding_size=embedding_size) batch_size = 128 epochs = len(reader) / batch_size # Network Parameters num_hidden = 512 # hidden layer num of features tf.reset_default_graph() model = SentenceClassifier(max_length, embedding_size, num_hidden, num_classes) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) model.restore(sess, "pretrained/sc/SentenceClassifier") total_acc = 0.0 total_cnt = 0.0 for step, pi in etc.range(epochs): # Get a batch of training instances. batch_x, batch_y, batch_len = reader.read(lines=batch_size) # Calculate batch accuracy and loss acc, loss = model.evaluate(batch_x, batch_y, batch_len, sess) total_acc += acc total_cnt += 1.0 print("Iter " + str(1 + step) + " / " + str(epochs) + \ ", Validation Accuracy= " + \ "{:.5f}".format(total_acc / total_cnt) + \ ", Time Remaining= " + \ etc.format_seconds(pi.time_remaining()), file = sys.stderr)
def sc_train(): max_length = 30 num_classes = 200 embedding_size = len(Vocabulary().restore("data/ds/vocabulary.csv")) reader = SentenceReader("data/sc/training.csv", num_classes, embedding_size=embedding_size) batch_size = 128 epochs = 2000 # Network Parameters num_hidden = 512 # hidden layer num of features model = SentenceClassifier(max_length, embedding_size, num_hidden, num_classes) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for step, pi in etc.range(epochs): # Get a batch of training instances. batch_x, batch_y, batch_len = reader.read(lines=batch_size) # Run optimization op (backprop) model.train(batch_x, batch_y, batch_len, sess) # Calculate batch accuracy and loss acc, loss = model.evaluate(batch_x, batch_y, batch_len, sess) print("Iter " + str(1 + step) + " / " + str(epochs) + \ ", Minibatch Loss= " + \ "{:.6f}".format(loss) + ", Training Accuracy= " + \ "{:.5f}".format(acc) + ", Time Remaining= " + \ etc.format_seconds(pi.time_remaining()), file = sys.stderr) if (1 + step) % 10 == 0: model.save(sess, 1 + step) print("Optimization Finished!", file=sys.stderr)
def sc_extract_embeddings(): max_length = 30 num_classes = 200 embedding_size = len(Vocabulary().restore("data/ds/vocabulary.csv")) reader = SentenceReader("data/sc/training.csv", num_classes, embedding_size = embedding_size) epochs = num_classes # Network Parameters num_hidden = 512 # hidden layer num of features model = SentenceClassifier(max_length, embedding_size, num_hidden, num_classes) init = tf.global_variables_initializer() cache = EmbeddingCache() with tf.Session() as sess: sess.run(init) model.restore(sess, "pretrained/sc/SentenceClassifier") for step, pi in etc.range(epochs): # Get a batch of training instances. batch_x, batch_y, batch_len = reader.query(label = step + 1) # Calculate batch accuracy and loss activations = model.extract(sess, batch_x, batch_y, batch_len) embedding = np.mean(activations, axis = 0) cache.set(step + 1, embedding) print("Iter " + str(1 + step) + " / " + str(epochs) + \ ", Time Remaining= " + \ etc.format_seconds(pi.time_remaining()), file = sys.stderr) cache.save("data/sc/embeddings.csv")
def lm_train(): max_length = 30 dataset = Dataset() vocabulary = Vocabulary().restore("data/ds/vocabulary.csv") embedding_size = len(vocabulary) feature_size = 512 + 1536 reader = lm.SentenceReader("data/lm/training.csv", embedding_size=embedding_size) batch_size = 128 epochs = 5000 # Network Parameters num_hidden = 512 # hidden layer num of features model = lm.LanguageModel(max_length=max_length, embedding_size=embedding_size, feature_size=feature_size, num_hidden=num_hidden, num_classes=200) init = tf.global_variables_initializer() embedding_cache = EmbeddingCache() embedding_cache.restore("data/sc/embeddings.csv") feature_cache = FeatureCache() feature_cache.restore("data/cv/features.csv") try: os.makedirs("logs") except: pass writer = tf.summary.FileWriter("logs", graph=tf.get_default_graph()) sc_path = "pretrained/sc/SentenceClassifier" with tf.Session() as sess: sess.run(init) model.sentence_classifier.restore(sess, sc_path) print("Restored") for step, pi in etc.range(epochs): # Get a batch of training instances. instances, labels, sentences, lengths = \ reader.read(lines = batch_size) features = [feature_cache.get(index) for index in instances] features = [ np.concatenate([embedding_cache.get(label), feature]) for label, feature in features ] # Run optimization op (backprop) summary = model.train(sess, sentences, features, labels, lengths) writer.add_summary(summary, step) # Calculate batch accuracy and loss acc, loss, rel, dis = model.evaluate(sess, sentences, features, labels, lengths) print("Iter " + str(1 + step) + " / " + str(epochs) + \ ", Minibatch Loss= " + \ "{:.6f}".format(loss) + \ " (rel: {:.6f}, dis: {:.6f})".format(rel, dis) + \ ", Training Accuracy= " + \ "{:.5f}".format(acc) + ", Time Remaining= " + \ etc.format_seconds(pi.time_remaining()), file = sys.stderr) # Generate a sample sentence after each 10 iterations. if (1 + step) % 10 == 0: sentences = model.generate(sess, features) for sentence in sentences[0:1]: print(vocabulary.sentence([word for word in sentence]), file=sys.stderr) model.save(sess, step) print("Optimization Finished!", file=sys.stderr)
def lm_generate(): max_length = 30 dataset = Dataset() vocabulary = Vocabulary().restore("data/ds/vocabulary.csv") embedding_size = len(vocabulary) feature_size = 512 + 1536 reader = lm.SentenceReader("data/lm/training.csv", embedding_size = embedding_size) batch_size = 128 epochs = len(reader) / batch_size # Network Parameters num_hidden = 512 # hidden layer num of features model = lm.LanguageModel(max_length = max_length, embedding_size = embedding_size, feature_size = feature_size, num_hidden = num_hidden, num_classes = 200) init = tf.global_variables_initializer() embedding_cache = EmbeddingCache() embedding_cache.restore("data/sc/embeddings.csv") feature_cache = FeatureCache() feature_cache.restore("data/cv/features.csv") try: os.makedirs("logs") except: pass writer = tf.summary.FileWriter("logs", graph = tf.get_default_graph()) with tf.Session() as sess: sess.run(init) model.restore(sess, "pretrained/lm/LanguageModel") results = [] for step, pi in etc.range(epochs): # Get a batch of training instances. instances, labels, sentences, lengths = \ reader.read(lines = batch_size) features = [ feature_cache.get(index) for index in instances ] labels = [ label for label, feature in features ] features = [ np.concatenate([ embedding_cache.get(experiment(label)), feature ]) for label, feature in features ] sentences = model.generate(sess, features) print("Iter " + str(1 + step) + " / " + str(epochs) + \ ", Time Remaining= " + \ etc.format_seconds(pi.time_remaining()), file = sys.stderr) for i, sentence in enumerate(sentences): results.append({ "image_id": dataset.example(instances[i]).path + ".jpg", "caption": vocabulary.sentence([ word for word in sentence ], limit = True) }) if step % 10 == 0: try: os.makedirs("products") except: pass with open('products/sentences.best.json', 'w') as file: json.dump(results, file) print("Optimization Finished!", file = sys.stderr)