def gen_y_test(args): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Dataset functions envityvectorpath = args.ev relationvectorpath = args.rv entityvector = loadvector(envityvectorpath) relationvector = loadvector(relationvectorpath) vector = dict(entityvector, **relationvector) print('Loading vectors.') input_vocab = Vocabulary(args.invocab, vector, padding=args.padding) output_vocab_entity = Vocabulary(args.evocab, vector, padding=args.padding) output_vocab_relation = Vocabulary(args.revocab, vector, padding=args.padding) print('Loading datasets.') #save y_test test2 = Data(args.test_data, input_vocab, output_vocab_entity,output_vocab_relation) test2.load() target_list1 = test2.targets1 #target_list2 = test2.targets2 path = './results/y_test' with open(path, 'w') as f: for i in range(len(target_list1)): #f.write(str(i) + '\t'+target_list1[i]+'\t'+target_list2[i]+'\n') f.write(str(i) + '\t' + target_list1[i] + '\n') print('ytest in file')
def _build_dataset(self): self.start_id = start_id(self.output_vocab) self.end_id = end_id(self.output_vocab) data_file = ("./data/validation.csv" if self.opts.infer else "./data/training.csv") data = Data(data_file, self.input_vocab, self.output_vocab) data.load() transform(data) vocab = (self.input_vocab, self.output_vocab) self.generator = DataGenerator(data, vocab, self.opts, self.start_id, self.end_id) items = next(self.generator) output_types = {i: tf.dtypes.as_dtype(items[i].dtype) for i in items} output_shapes = {i: tf.TensorShape(items[i].shape) for i in items} total_bytes = 0 for i in items: total_bytes += items[i].nbytes dataset = tf.data.Dataset.from_generator(self.generator, output_types=output_types, output_shapes=output_shapes) infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, "InfeedQueue", replication_factor=1) data_init = infeed_queue.initializer return dataset, infeed_queue, data_init, vocab
def testmodel(args): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Dataset functions envityvectorpath = args.ev relationvectorpath = args.rv entityvector = loadvector(envityvectorpath) relationvector = loadvector(relationvectorpath) vector = dict(entityvector, **relationvector) print('Loading vectors.') input_vocab = Vocabulary(args.invocab, vector,padding=args.padding) output_vocab_entity = Vocabulary(args.evocab, vector,padding=args.padding) output_vocab_relation = Vocabulary(args.revocab, vector, padding=args.padding) print('Loading datasets.') test=Data(args.test_data, input_vocab, output_vocab_entity,output_vocab_relation) test.load() test.transform(vector) print('Test Datasets Loaded.') model=load_model('./savemodel/model1.h5',custom_objects={'AttentionLayer': AttentionLayer}) print('Model Loaded. Start test.') #prediction = model.predict([test.inputs1, test.inputs2,test.inputs3,test.inputs4, test.inputs5]) prediction = model.predict([test.inputs1, test.inputs2, test.inputs3]) #/result/y_pre p_prediction1 = list(prediction.flatten()) #p_prediction2 = list(prediction[1].flatten()) #num_entity = output_vocab_entity.size() num_relation = output_vocab_relation.size() # for m in range(int(len(p_prediction)/num)): # prediction_list.append('') prediction_list1 = [[0 for col in range(num_relation)] for row in range(int(len(p_prediction1)/num_relation))] #prediction_list2 = [[0 for col in range(num_entity)] for row in range(int(len(p_prediction2) / num_entity))] for i in range(len(p_prediction1)): j = int(i / num_relation) k = i % num_relation prediction_list1[j][k]=[k,p_prediction1[i]] # for i in range(len(p_prediction2)): # j = int(i / num_entity) # k = i % num_entity # prediction_list2[j][k]=[k,p_prediction2[i]] pretarget1 = [] pretarget2 = [] for i in range(len(prediction_list1)): templist1 = prediction_list1[i] templist1.sort(key=takeSecond, reverse=True) templist11 = output_vocab_relation.int_to_string(templist1) pretarget1.append(templist11[:5]) pretarget2.append(templist1) listinfile(pretarget1, './results/y_pre1') listinfile(pretarget2, './results/y_pre2') print('ypre1 in file')
def main(args): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu # Dataset functions input_vocab = Vocabulary('./data/human_vocab.json', padding=args.padding) output_vocab = Vocabulary('./data/machine_vocab.json', padding=args.padding) print('Loading datasets.') training = Data(args.training_data, input_vocab, output_vocab) validation = Data(args.validation_data, input_vocab, output_vocab) training.load() validation.load() training.transform() validation.transform() print('Datasets Loaded.') print('Compiling Model.') model = simpleNMT(pad_length=args.padding, n_chars=input_vocab.size(), n_labels=output_vocab.size(), embedding_learnable=False, encoder_units=256, decoder_units=256, trainable=True, return_probabilities=False) model.summary() model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', all_acc]) print('Model Compiled.') print('Training. Ctrl+C to end early.') try: kwargs = dict(generator=training.generator(args.batch_size), steps_per_epoch=100, validation_data=validation.generator(args.batch_size), validation_steps=100, callbacks=[cp], workers=1, verbose=1, epochs=args.epochs) model.fit_generator(**kwargs) except KeyboardInterrupt as e: print('Model training stopped early.') print('Model training complete.') run_examples(model, input_vocab, output_vocab)
def main(args): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu # Dataset functions input_vocab = Vocabulary('./data/human_vocab.json', padding=args.padding) output_vocab = Vocabulary('./data/machine_vocab.json', padding=args.padding) print('Loading datasets.') training = Data(args.training_data, input_vocab, output_vocab) validation = Data(args.validation_data, input_vocab, output_vocab) training.load() validation.load() training.transform() validation.transform() print('Datasets Loaded.') print('Compiling Model.') model = simpleNMT(pad_length=args.padding, n_chars=input_vocab.size(), n_labels=output_vocab.size(), embedding_learnable=False, encoder_units=256, decoder_units=256, trainable=True, return_probabilities=False) model.summary() model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', all_acc]) print('Model Compiled.') print('Training. Ctrl+C to end early.') try: model.fit_generator(generator=training.generator(args.batch_size), steps_per_epoch=100, validation_data=validation.generator(args.batch_size), validation_steps=100, callbacks=[cp], workers=1, verbose=1, epochs=args.epochs) except KeyboardInterrupt as e: print('Model training stopped early.') print('Model training complete.') run_examples(model, input_vocab, output_vocab)
def main(args): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Dataset functions envityvectorpath =args.ev relationvectorpath =args.rv entityvector = loadvector(envityvectorpath) relationvector = loadvector(relationvectorpath) vector = dict(entityvector, **relationvector) print('Loading vectors.') input_vocab = Vocabulary(args.invocab, vector, padding=args.padding) output_vocab_entity = Vocabulary(args.evocab, vector, padding=args.padding) output_vocab_relation = Vocabulary(args.revocab, vector, padding=args.padding) print('Loading datasets.') training = Data(args.training_data, input_vocab, output_vocab_entity,output_vocab_relation) validation = Data(args.validation_data, input_vocab, output_vocab_entity,output_vocab_relation) test=Data(args.test_data, input_vocab, output_vocab_entity,output_vocab_relation) training.load() validation.load() test.load() training.transform(vector) validation.transform(vector) test.transform(vector) print('Datasets Loaded.') print('Compiling Model.') model = simpleNMT2(pad_length=args.padding, n_chars=100, entity_labels=output_vocab_entity.size(), relation_labels=output_vocab_relation.size(), dim=100, embedding_learnable=False, encoder_units=args.units, decoder_units=args.units, trainable=True, return_probabilities=False, ) model.summary() model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) print('Model Compiled.') print('Training. Ctrl+C to end early.') try: hist=model.fit([training.inputs1,training.inputs2,training.inputs3,training.inputs4,training.inputs5],[training.targets1],epochs=args.epochs,batch_size=args.batch_size,validation_split=0.05) except KeyboardInterrupt as e: print('Model training stopped early.') model.save('./savemodel/model1.h5') print('Model training complete.')