def sample(args): print('Loading data') positive_data_file = "./data/pos.txt" negative_data_file = "./data/neg.txt" x, y, vocabulary, vocabulary_inv = utils.load_data(positive_data_file, negative_data_file) text = [list(args.text)] sentences_padded = utils.pad_sentences(text, maxlen=x.shape[1]) raw_x, dummy_y = utils.build_input_data(sentences_padded, [0], vocabulary) checkpoint_file = tf.train.latest_checkpoint(args.checkpoint_dir) graph = tf.Graph() with graph.as_default(): sess = tf.Session() with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("output/predictions").outputs[0] predicted_result = sess.run(predictions, {input_x: raw_x, dropout_keep_prob: 1.0}) if (predicted_result[0] == 0): print(args.text + ": negative") else: print(args.text + ": positive")
def padding(data, max_len): """ Pad sentences to maximal length @data: Sentence pairs @max_len: Maximal length of the sentences in all three sets @return: Padded sentence pairs """ padded_data = {} for s in data.keys(): padded_data[s] = pad_sentences(data[s], max_len) return padded_data
def run_model(args, graph, sess, x, y, vocabulary, text): sentences_padded = utils.pad_sentences(text, maxlen=x.shape[1]) raw_x, dummy_y = utils.build_input_data(sentences_padded, [0], vocabulary) # Load the saved meta graph and restore variables checkpoint_file = tf.train.latest_checkpoint(args.checkpoint_dir) saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] predictions = graph.get_operation_by_name("output/predictions").outputs[0] predicted_result = sess.run(predictions, { input_x: raw_x, dropout_keep_prob: 1.0 }) return predicted_result
mode = sys.argv[1] if mode != "train" and mode != "test": print("Invalid Mode!") exit() print("Reading data from Corpus...") train_reviews, test_reviews, train_labels, test_labels = read_data(path) vocab = Vocab() print("Building Vocab...") vocab.build(train_reviews + test_reviews) VOCAB_SIZE = len(vocab.word2idx) padded_train = pad_sentences(train_reviews) padded_test = pad_sentences(test_reviews) int_train = word_to_int(padded_train, vocab.word2idx) int_test = word_to_int(padded_test, vocab.word2idx) x_train = Variable(torch.LongTensor(int_train).to(DEVICE)) x_test = Variable(torch.LongTensor(int_test).to(DEVICE)) y_train = Variable(torch.LongTensor(train_labels).to(DEVICE)) y_test = Variable(torch.LongTensor(test_labels).to(DEVICE)) print("Instantiating the Model...") model = Classifier(VOCAB_SIZE, EMBED_DIM, HIDDEN_DIM, NUM_OUTPUTS, NUM_LAYERS) print(model)
def do_pass(batches, counters, shot, way, query, expressions, train, test, id_to_token=None, id_to_tag=None, test_cls=None): model, optimizer = expressions llog, alog = Averager(), Averager() if test: output_file = open("./output.txt" + str(test_cls), 'w') for i, (batch, counter) in enumerate(zip(batches, counters), 1): #print("Batch number\t"+str(i)) data_token = [x for _, x, _, _ in batch] data_sentence = [sent for sent, _, _, _ in batch] data_label = [label for _, _, label, _ in batch] p = shot * way #print(len(data_token)) #print(p) #print(shot) #print(way) data_token_shot, data_token_query = data_token[:p], data_token[p:] data_sentence_shot, data_sentence_query = data_sentence[: p], data_sentence[ p:] counter_token, counter_query = counter[:p], counter[p:] (data_sentence_shot, sentence_shot_lens), (data_sentence_query, query_shot_lens) = pad_sentences( data_sentence_shot, MAX_SENT_LEN), pad_sentences( data_sentence_query, MAX_SENT_LEN) proto = model(data_sentence_shot, data_token_shot, sentence_shot_lens) proto = proto.reshape(shot, way, -1).mean(dim=0) ####label = torch.arange(way).repeat(query) if not train: #print(len(data_token)) #print(p) #print(way) query = int((len(data_token) - p) / way) #print(query) #exit() label = torch.arange(way).repeat(query) label = label.type(torch.LongTensor).to(device) logits = euclidean_metric( model(data_sentence_query, data_token_query, query_shot_lens), proto) #print(list(model.parameters())) #print(model.return_0class()) #print(logits.size()) logits[:, 0] = model.return_0class() #print(logits.size()) #print(label.size()) #print(len(counter_query)) #print(counter_query) #print("---") loss = F.cross_entropy(logits, label) acc = count_acc(logits, label, counter_query) llog.add(loss.item()) alog.add(acc) if train: optimizer.zero_grad() loss.backward() optimizer.step() if test: #print the outputs to a file save_dev_output(output_file, logits, label, data_label, data_sentence_query, data_token_query, query_shot_lens, id_to_token, id_to_tag) if test: output_file.close() return llog, alog
import calendar from os.path import isfile import pandas as pd from config import max_count from utils import get_all_lines, pad_sentences, build_vocab preprocessed_events_description = "data/barclays_events_description_preprocessed.csv" assert isfile(preprocessed_events_description) assert isfile("data/barclays_events.csv") months = list(calendar.month_abbr) df = pd.read_csv("data/barclays_events.csv", sep=", ") preprocessed_descriptions = get_all_lines(preprocessed_events_description) padded_description = pad_sentences(preprocessed_descriptions) vocabulary, vocabulary_inv, word_counts = build_vocab(padded_description) print("Length of vocab is: {}".format(len(vocabulary))) def get_encoded_sentence(sentence): padded_sentece = [0] * max_count words = sentence.split(" ") for i in range(min(max_count, len(words))): padded_sentece[i] = vocabulary.get(words[i].strip(), 0) return padded_sentece def get_all_events(): events_info = {} with open(preprocessed_events_description) as f: desciptions = f.readlines()[1:]