def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # create dataset valid = CoNLLDataset(config.filename_valid, config.processing_word, config.processing_tag, config.max_iter) # evaluate and interact model.build_submission(valid, config.dir_output+"submission.csv")
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # create dataset test = CoNLLDataset(config.filename_test, max_iter=config.max_iter) # evaluate and interact model.evaluate(test) interactive_shell(model)
def __init__(self, load_lstm): import sys if load_lstm: sys.path.append('/home/rbshaffer/sequence_tagging') from model.ner_model import NERModel from model.config import Config config = Config() # build model self.model = NERModel(config) self.model.build() self.model.restore_session(config.dir_model)
def __init__(self, payment, session=None, proxy={}): self.session = session self.proxy = proxy self.payment = payment self.config = Config() self.log = Log() techcombank = self.get_techcombank_config() self.email_transport = EmailTransport() self.login_url = techcombank['login_url'] self.username = payment.get_username() self.password = payment.get_password() self.debug_mode = techcombank['debug_mode'] self.total_transactions = 0 self.history = History() self.code = GenerateCode()
def main(): # create instance of config config = Config() # build model model = QAModel(config) model.build() model.restore_session(config.dir_model) # create dataset test = QADataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # evaluate and interact model.evaluate(test)
def main(argv=None): # Configurations config = Config(gpu='1', root_dir='./data/test/', root_dir_val=None, mode='testing') config.BATCH_SIZE = 1 # Get images and labels. dataset_test = Dataset(config, 'test') # Train _M, _s, _b, _C, _T, _imname = _step(config, dataset_test, False) # Add ops to save and restore all the variables. saver = tf.train.Saver(max_to_keep=50,) with tf.Session(config=config.GPU_CONFIG) as sess: # Restore the model ckpt = tf.train.get_checkpoint_state(config.LOG_DIR) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) last_epoch = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] print('**********************************************************') print('Restore from Epoch '+str(last_epoch)) print('**********************************************************') else: init = tf.initializers.global_variables() last_epoch = 0 sess.run(init) print('**********************************************************') print('Train from scratch.') print('**********************************************************') step_per_epoch = int(len(dataset_test.name_list) / config.BATCH_SIZE) with open(config.LOG_DIR + '/test/score.txt', 'w') as f: for step in range(step_per_epoch): M, s, b, C, T, imname = sess.run([_M, _s, _b, _C, _T, _imname]) # save the score for i in range(config.BATCH_SIZE): _name = imname[i].decode('UTF-8') _line = _name + ',' + str("{0:.3f}".format(M[i])) + ','\ + str("{0:.3f}".format(s[i])) + ','\ + str("{0:.3f}".format(b[i])) + ','\ + str("{0:.3f}".format(C[i])) + ','\ + str("{0:.3f}".format(T[i])) f.write(_line + '\n') print(str(step+1)+'/'+str(step_per_epoch)+':'+_line, end='\r') print("\n")
def main(): """Procedure to build data You MUST RUN this procedure. It iterates over the whole dataset (train, dev and test) and extract the vocabularies in terms of words, tags, and characters. Having built the vocabularies it writes them in a file. The writing of vocabulary in a file assigns an id (the line #) to each word. It then extract the relevant GloVe vectors and stores them in a np array such that the i-th entry corresponds to the i-th word in the vocabulary. Args: config: (instance of Config) has attributes like hyper-params... """ # get config and processing of words config = Config(load=False) processing_word = get_processing_word(lowercase=True) # 把字符全部小写,数字替换成NUM # Generators dev = CoNLLDataset(config.filename_dev, processing_word) # 创建一个生成器对象,每一次迭代产生tuple (words,tags) test = CoNLLDataset(config.filename_test, processing_word) # 返回一句话(words),和标签tags train = CoNLLDataset(config.filename_train, processing_word) # Build Word and Tag vocab vocab_words, vocab_tags = get_vocabs([train, dev, test]) # word词表, tags表 vocab_glove = get_glove_vocab(config.filename_glove) # glove词表 vocab = vocab_words & vocab_glove # & 求交集 set,都是集合 vocab.add(UNK) vocab.add(NUM) # 手动添加 # Save vocab write_vocab(vocab, config.filename_words) write_vocab(vocab_tags, config.filename_tags) # Trim GloVe Vectors vocab = load_vocab(config.filename_words) # 得到dict类型的vocab:{word:index} # 针对vocab,生成numpy的embedding文件,包含一个矩阵,对应词嵌入 export_trimmed_glove_vectors(vocab, config.filename_glove, config.filename_trimmed, config.dim_word) # Build and save char vocab 生成字母表, 这里没用到小写化的东西。只有文件本身。 train = CoNLLDataset(config.filename_train) vocab_chars = get_char_vocab(train) write_vocab(vocab_chars, config.filename_chars)
def main(): # get config and processing of words config = Config(load=False) # should be source_x.txt # or ontonotes-nw if you like config.filename_train = "../datasets/ritter2011/train" config.filename_dev = "../datasets/ritter2011/train" config.filename_test = "../datasets/ritter2011/train" config.filename_chars = config.filename_chars.replace("source", "target") config.filename_glove = config.filename_glove.replace("source", "target") config.filename_tags = config.filename_tags.replace("source", "target") config.filename_words = config.filename_words.replace("source", "target") config.dir_model = config.dir_model.replace("source", "target") config.dir_output = config.dir_output.replace("source", "target") config.path_log = config.path_log.replace("source", "target") processing_word = get_processing_word(lowercase=True) # Generators dev = NERDataset(config.filename_dev, processing_word) test = NERDataset(config.filename_test, processing_word) train = NERDataset(config.filename_train, processing_word) # Build Word and Tag vocab vocab_words, vocab_tags = get_vocabs([train, dev, test]) vocab_glove = get_glove_vocab(config.filename_glove) vocab = vocab_words & vocab_glove vocab.add(UNK) vocab.add(NUM) # Save vocab write_vocab(vocab, config.filename_words) write_vocab(vocab_tags, config.filename_tags) # Trim Word Vectors vocab = load_vocab(config.filename_words) export_trimmed_glove_vectors(vocab, config.filename_glove, config.filename_trimmed, config.dim_word) # Build and save char vocab train = NERDataset(config.filename_train) vocab_chars = get_char_vocab(train) write_vocab(vocab_chars, config.filename_chars)
def main(): """Procedure to build data You MUST RUN this procedure. It iterates over the whole dataset (train, dev and test) and extract the vocabularies in terms of words, tags, and characters. Having built the vocabularies it writes them in a file. The writing of vocabulary in a file assigns an id (the line #) to each word. It then extract the relevant GloVe vectors and stores them in a np array such that the i-th entry corresponds to the i-th word in the vocabulary. Args: config: (instance of Config) has attributes like hyper-params... """ # get config and processing of words config = Config(load=False) processing_word = get_processing_word(lowercase=True) # 把字符全部小写,数字替换成NUM # Generators to_be_add = CoNLLDataset1(config.filename_test, processing_word) # 返回一句话(words),和标签tags # Build Word and Tag vocab vocab_words, _ = get_vocabs([to_be_add]) vocab_glove = get_glove_vocab(config.filename_glove) # glove词表 words_have_vec = vocab_words & vocab_glove vocab_words_and_entity = entity2vocab(datasets=[to_be_add], vocab=words_have_vec) vocab_in_file = set(load_vocab(config.filename_words)) vocab_words_to_be_add = vocab_words_and_entity - vocab_in_file if len(vocab_words_to_be_add) != 0: with open(config.filename_words, 'a') as f: for i, vocab_word in enumerate(vocab_words_to_be_add): f.write('\n{}'.format(vocab_word)) # Trim GloVe Vectors vocab = load_vocab(config.filename_words) # 得到dict类型的vocab:{word:index} # 针对vocab,生成numpy的embedding文件,包含一个矩阵,对应词嵌入 export_trimmed_glove_vectors(vocab, config.filename_glove, config.filename_trimmed, config.dim_word)
def main(): """Procedure to build data You MUST RUN this procedure. It iterates over the whole dataset (train, dev and test) and extract the vocabularies in terms of words, tags, and characters. Having built the vocabularies it writes them in a file. The writing of vocabulary in a file assigns an id (the line #) to each word. It then extract the relevant GloVe vectors and stores them in a np array such that the i-th entry corresponds to the i-th word in the vocabulary. Args: config: (instance of Config) has attributes like hyper-params... """ # get config and processing of words config = Config(load=False) processing_word = get_processing_word(lowercase=True) # Generators dev = CoNLLDataset(config.filename_dev, processing_word=processing_word) test = CoNLLDataset(config.filename_test, processing_word=processing_word) train = CoNLLDataset(config.filename_train, processing_word=processing_word) # Build Word and Tag vocab # vocab_words, vocab_tags = get_vocabs([train, dev, test]) vocab_words, _ = get_vocabs([train, dev]) _, vocab_tags = get_vocabs([train, dev, test]) vocab_glove = get_glove_vocab(config.filename_glove) vocab = vocab_words | vocab_glove vocab.add(NUM) vocab.add(UNK) # Save vocab write_vocab(vocab, config.filename_words) write_vocab(vocab_tags, config.filename_tags) # Trim GloVe Vectors vocab = load_vocab(config.filename_words) export_trimmed_glove_vectors(vocab, config.filename_glove, config.filename_trimmed, config.dim_word) # Build and save char vocab train = CoNLLDataset(config.filename_train) vocab_chars = get_char_vocab(train) write_vocab(vocab_chars, config.filename_chars)
def main(): # create instance of config config = Config() config.dir_model = config.dir_output + "model.finetuning.weights/" # build model model = RippleModel(config) model.build("fine_tuning") model.restore_session(config.dir_model) # create dataset test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_action, config.max_iter) # evaluate and interact model.evaluate(test)
def main(): config = Config() #build model model = TaggerModel(config) print(model.idx_to_tag) model.build() # model.restore_session("results/crf/model.weights/") # optional, restore weights # model.reinitialize_weights("proj") dev = Dataset(config.filename_dev, config.processing_word, config.processing_tag, config.max_iter) train = Dataset(config.filename_train, config.processing_word, config.processing_tag, config.max_iter) model.train(train, dev)
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() # model.restore_session("results/crf/model.weights/") # optional, restore weights # model.reinitialize_weights("proj") # create datasets dev = CoNLLDataset(config.filename_dev, max_iter=config.max_iter) train = CoNLLDataset(config.filename_train, max_iter=config.max_iter) # train model model.train(train, dev)
def main(): # create instance of config config = Config() if config.use_elmo: config.processing_word = None #build model model = NERModel(config) # create datasets dev = CoNLLDataset(config.filename_dev, config.processing_word, config.processing_tag, config.max_iter, config.use_crf) train = CoNLLDataset(config.filename_train, config.processing_word, config.processing_tag, config.max_iter, config.use_crf) learn = NERLearner(config, model) learn.fit(train, dev)
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # create dataset test = CoNLLDataset(config.filename_test, config.processing_word, max_iter=config.max_iter) # evaluate and interact model.classify(test)
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # create dataset test = PreProcessData(config.f_test, config.processing_word, config.processing_tag, config.max_iter) # evaluate and interact model.evaluate(test) interactive_shell(model)
def main(): # with tf.device("/device:GPU:0"): # create instance of config config = Config() # build model model = NERModel(config) model.build() # model.restore_session("results/crf/model.weights/") # optional, restore weights # model.reinitialize_weights("proj")s # create datasets dev = PreProcessData(config.f_dev,config.processing_word, config.processing_tag, config.max_iter) train = PreProcessData(config.f_train, config.processing_word, config.processing_tag, config.max_iter) # train model model.train(train, dev)
def aspectExtractor(sentence): # create instance of config config = Config() # build model model = ASPECTModel(config) model.build() model.restore_session(config.dir_model) # create dataset test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # evaluate and interact model.evaluate(test) preds = interactive_shell(model, sentence) return preds
def load(file): with open(file) as f: data = json.load(f) channelIds = set(x["channel"] for x in data["channels"]) detectorIds = set(x["detector"] for x in data["channels"]) channelId = IntEnum("ChannelId", list(channelIds)) detectorId = IntEnum("DetectorId", list(detectorIds)) channels = { c["id"]: Channel(channelId[c["channel"]], detectorId[c["detector"]]) for c in data["channels"] } return Config(channelId, detectorId, channels, data["properties"], data["particleTimes"])
def main(): # create instance of config config = Config() # build model model = Model(config) model.build() model.restore_session( "results/file_name/model.weights/") # optional, restore weights # create datasets train, dev = getDataSet('data/train/pos_sample.txt', 'data/train/neg_sample.txt') # evaluate model model.evaluate(dev)
def main(): config = Config() dev = CoNLLDataset(config.filename_dev, config.processing_word, config.processing_tag) train = CoNLLDataset(config.filename_train, config.processing_word, config.processing_tag) test = CoNLLDataset(config.filename_test, config.processing_word,config.processing_tag) max_sequence_length = max(max([len(seq[0]) for seq in train]), max([len(seq[0]) for seq in dev]), max([len(seq[0]) for seq in test])) max_word_length = max(max([len(word[0]) for seq in train for word in seq[0]]), max([len(word[0]) for seq in test for word in seq[0]]), max([len(word[0]) for seq in dev for word in seq[0]])) print(max_word_length, max_sequence_length) model = NERModel(config, max_word_length, max_sequence_length) model.build() model.train(train, dev) model.restore_session(config.dir_model) model.evaluate(test)
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # "results/test/model.weights/" # create dataset test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # evaluate and interact model.evaluate(test) interactive_shell(model) # test in commend lines
def main(): # create instance of config config = Config(operation='evaluate') # build model model = REGShell(config) model.restore_model( 'results/train/20180905_112821/model/checkpoint.pth.tar') # create datasets test = REGDataset(config.filename_test, config=config, max_iter=config.max_iter) # evaluate on test set model.evaluate(test)
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() # create datasets dev = CoNLLDataset(config.filename_dev, config.processing_word, config.processing_tag, config.max_iter) train = CoNLLDataset(config.filename_train, config.processing_word, config.processing_tag, config.max_iter) # train model model.train(train, dev)
def init(): """ This function is to load the trained Keras model. :return - None """ # load the pre-trained tensorflow model global model, graph # create instance of config config = Config() # build model model = ASPECTModel(config) model.build() model.restore_session(config.dir_model) graph = tf.get_default_graph()
def main(): # create instance of config config = Config() # build model model = NERModel(config) model.build() model.restore_session("./results/server_test/model.weights/") # create dataset test = CoNLLDataset("../movie-dialogs/benchmark_test.txt", config.processing_word, config.processing_tag, config.max_iter) # evaluate and interact predictions, correct_predictions = model.predict_all(test)
def main(): ''' evaluate using saved models ''' # create instance of config config = Config() # build model model = NERModel(config) model.build() model.restore_session(config.dir_model) # create dataset test = CoNLLDataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) print(type(config.vocab_words)) # eval uate and interact #model.evaluate(test) lab = [] seqs = [] for words, labels in minibatches(test, 1): temp = [] temp2 = [] w = copy.deepcopy(words) A = list(w[0]) labels_pred, sequence_lengths = model.predict_batch(words) #fd, sequence_lengths = model.get_feed_dict(words, dropout=1.0) for i, y in enumerate(labels_pred[0]): x = A[0][i] temp3 = [] for letter in x: #print(letter) temp3.append(model.idx_to_char[letter]) temp.append(model.idx_to_tag[y]) temp2.append(''.join(temp3)) #temp2.append(model.config.processing_word[x]) lab.append(temp) seqs.append(temp2) print(lab[0:3]) print(seqs[0:3]) #interactive_shell(model) name = 'pred_give_ingredient_dev.txt' data = load(config.filename_test) print(data[0:3]) write_pred(data, lab, name)
def __init__(self): super(base_login, self).__init__() self.setupUi(self) self.config_object = Config() self.message_box = Messagebox() self.btn_server.clicked.connect(lambda: self.login_into_server( self.username_textbox.text(), self.password_textbox.text())) self.btn_client.clicked.connect(lambda: self.login_into_client( self.username_textbox.text(), self.password_textbox.text())) self.btn_new_reg.clicked.connect(lambda: self.new_user_reg( self.new_username_textbox.text(), self.new_password_textbox.text(), self.new_confirmed_passw_textbox.text())) self.btn_login_cancel.clicked.connect(self.close_main_window) self.btn_reg_cancel.clicked.connect(self.close_main_window) self.configuration_start() self.db_path = self.config_object.get_db_path() self.sql = SqliteController()
def main(): # create instance of config,这里的config实现了load data的作用 #拥有词表、glove训练好的embeddings矩阵、str->id的function config = Config() config.nepochs = 200 config.dropout = 0.5 config.batch_size = 60 config.lr_method = "adam" config.lr = 0.0005 config.lr_decay = 1.0 config.clip = -2.0 # if negative, no clipping config.nepoch_no_imprv = 8 config.dir_model = config.dir_output + "model.finetuning.weights/" # build model model = NERModel(config) model.build("fine_tuning") model.restore_session("results/test/model.weights/", indicate="fine_tuning") # model.restore_session("results/crf/model.weights/") # optional, restore weights # model.reinitialize_weights("proj") # create datasets [(char_ids), word_id] # processing_word = get_processing_word(lowercase=True) dev = CoNLLDataset(config.filename_dev) train = CoNLLDataset(config.filename_train) test = CoNLLDataset(config.filename_test) # train model train4cl = CoNLLdata4classifier(train, processing_word=config.processing_word, processing_tag=config.processing_tag, context_length=config.context_length) dev4cl = CoNLLdata4classifier(dev, processing_word=config.processing_word, processing_tag=config.processing_tag, context_length=config.context_length) test4cl = CoNLLdata4classifier(test, processing_word=config.processing_word, processing_tag=config.processing_tag, context_length=config.context_length) model.train(train4cl, dev4cl, test4cl)
def main(argv=None): # Configurations config = Config() config.DATA_DIR = ['/data/'] config.LOG_DIR = './log/model' config.MODE = 'training' config.STEPS_PER_EPOCH_VAL = 180 config.display() # Get images and labels. dataset_train = Dataset(config, 'train') # Build a Graph model = Model(config) # Train the model model.compile() model.train(dataset_train, None)