Exemplo n.º 1
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # create dataset
    valid = CoNLLDataset(config.filename_valid, config.processing_word,
                        config.processing_tag, config.max_iter)

    # evaluate and interact
    model.build_submission(valid, config.dir_output+"submission.csv")
Exemplo n.º 2
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # create dataset
    test  = CoNLLDataset(config.filename_test, max_iter=config.max_iter)

    # evaluate and interact
    model.evaluate(test)
    interactive_shell(model)
    def __init__(self, load_lstm):

        import sys

        if load_lstm:
            sys.path.append('/home/rbshaffer/sequence_tagging')

            from model.ner_model import NERModel
            from model.config import Config
            config = Config()

            # build model
            self.model = NERModel(config)
            self.model.build()
            self.model.restore_session(config.dir_model)
Exemplo n.º 4
0
 def __init__(self, payment, session=None, proxy={}):
     self.session = session
     self.proxy = proxy
     self.payment = payment
     self.config = Config()
     self.log = Log()
     techcombank = self.get_techcombank_config()
     self.email_transport = EmailTransport()
     self.login_url = techcombank['login_url']
     self.username = payment.get_username()
     self.password = payment.get_password()
     self.debug_mode = techcombank['debug_mode']
     self.total_transactions = 0
     self.history = History()
     self.code = GenerateCode()
Exemplo n.º 5
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = QAModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # create dataset
    test = QADataset(config.filename_test, config.processing_word,
                     config.processing_tag, config.max_iter)

    # evaluate and interact
    model.evaluate(test)
Exemplo n.º 6
0
def main(argv=None): 
  # Configurations
  config = Config(gpu='1',
                  root_dir='./data/test/',
                  root_dir_val=None,
                  mode='testing')
  config.BATCH_SIZE = 1

  # Get images and labels.
  dataset_test = Dataset(config, 'test')

  # Train
  _M, _s, _b, _C, _T, _imname = _step(config, dataset_test, False)

  # Add ops to save and restore all the variables.
  saver = tf.train.Saver(max_to_keep=50,)
  with tf.Session(config=config.GPU_CONFIG) as sess:
    # Restore the model
    ckpt = tf.train.get_checkpoint_state(config.LOG_DIR)
    if ckpt and ckpt.model_checkpoint_path:
      saver.restore(sess, ckpt.model_checkpoint_path)
      last_epoch = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
      print('**********************************************************')
      print('Restore from Epoch '+str(last_epoch))
      print('**********************************************************')
    else:
      init = tf.initializers.global_variables()
      last_epoch = 0
      sess.run(init)
      print('**********************************************************')
      print('Train from scratch.')
      print('**********************************************************')

    step_per_epoch = int(len(dataset_test.name_list) / config.BATCH_SIZE)
    with open(config.LOG_DIR + '/test/score.txt', 'w') as f:
      for step in range(step_per_epoch):
        M, s, b, C, T, imname = sess.run([_M, _s, _b, _C, _T, _imname])
        # save the score
        for i in range(config.BATCH_SIZE):
            _name = imname[i].decode('UTF-8')
            _line = _name + ',' + str("{0:.3f}".format(M[i])) + ','\
                                + str("{0:.3f}".format(s[i])) + ','\
                                + str("{0:.3f}".format(b[i])) + ','\
                                + str("{0:.3f}".format(C[i])) + ','\
                                + str("{0:.3f}".format(T[i]))
            f.write(_line + '\n')  
            print(str(step+1)+'/'+str(step_per_epoch)+':'+_line, end='\r')  
    print("\n")
def main():
    """Procedure to build data

    You MUST RUN this procedure. It iterates over the whole dataset (train,
    dev and test) and extract the vocabularies in terms of words, tags, and
    characters. Having built the vocabularies it writes them in a file. The
    writing of vocabulary in a file assigns an id (the line #) to each word.
    It then extract the relevant GloVe vectors and stores them in a np array
    such that the i-th entry corresponds to the i-th word in the vocabulary.


    Args:
        config: (instance of Config) has attributes like hyper-params...

    """
    # get config and processing of words
    config = Config(load=False)
    processing_word = get_processing_word(lowercase=True)  # 把字符全部小写,数字替换成NUM

    # Generators
    dev = CoNLLDataset(config.filename_dev,
                       processing_word)  # 创建一个生成器对象,每一次迭代产生tuple (words,tags)
    test = CoNLLDataset(config.filename_test,
                        processing_word)  # 返回一句话(words),和标签tags
    train = CoNLLDataset(config.filename_train, processing_word)

    # Build Word and Tag vocab
    vocab_words, vocab_tags = get_vocabs([train, dev, test])  # word词表, tags表
    vocab_glove = get_glove_vocab(config.filename_glove)  # glove词表

    vocab = vocab_words & vocab_glove  # & 求交集  set,都是集合
    vocab.add(UNK)
    vocab.add(NUM)  # 手动添加

    # Save vocab
    write_vocab(vocab, config.filename_words)
    write_vocab(vocab_tags, config.filename_tags)

    # Trim GloVe Vectors
    vocab = load_vocab(config.filename_words)  # 得到dict类型的vocab:{word:index}
    # 针对vocab,生成numpy的embedding文件,包含一个矩阵,对应词嵌入
    export_trimmed_glove_vectors(vocab, config.filename_glove,
                                 config.filename_trimmed, config.dim_word)

    # Build and save char vocab   生成字母表, 这里没用到小写化的东西。只有文件本身。
    train = CoNLLDataset(config.filename_train)
    vocab_chars = get_char_vocab(train)
    write_vocab(vocab_chars, config.filename_chars)
Exemplo n.º 8
0
def main():
    # get config and processing of words
    config = Config(load=False)
    # should be source_x.txt

    # or ontonotes-nw if you like

    config.filename_train = "../datasets/ritter2011/train"
    config.filename_dev = "../datasets/ritter2011/train"
    config.filename_test = "../datasets/ritter2011/train"

    config.filename_chars = config.filename_chars.replace("source", "target")
    config.filename_glove = config.filename_glove.replace("source", "target")
    config.filename_tags = config.filename_tags.replace("source", "target")
    config.filename_words = config.filename_words.replace("source", "target")

    config.dir_model = config.dir_model.replace("source", "target")
    config.dir_output = config.dir_output.replace("source", "target")
    config.path_log = config.path_log.replace("source", "target")

    processing_word = get_processing_word(lowercase=True)

    # Generators
    dev = NERDataset(config.filename_dev, processing_word)
    test = NERDataset(config.filename_test, processing_word)
    train = NERDataset(config.filename_train, processing_word)

    # Build Word and Tag vocab
    vocab_words, vocab_tags = get_vocabs([train, dev, test])
    vocab_glove = get_glove_vocab(config.filename_glove)

    vocab = vocab_words & vocab_glove
    vocab.add(UNK)
    vocab.add(NUM)

    # Save vocab
    write_vocab(vocab, config.filename_words)
    write_vocab(vocab_tags, config.filename_tags)

    # Trim Word Vectors
    vocab = load_vocab(config.filename_words)
    export_trimmed_glove_vectors(vocab, config.filename_glove,
                                 config.filename_trimmed, config.dim_word)

    # Build and save char vocab
    train = NERDataset(config.filename_train)
    vocab_chars = get_char_vocab(train)
    write_vocab(vocab_chars, config.filename_chars)
def main():
    """Procedure to build data

    You MUST RUN this procedure. It iterates over the whole dataset (train,
    dev and test) and extract the vocabularies in terms of words, tags, and
    characters. Having built the vocabularies it writes them in a file. The
    writing of vocabulary in a file assigns an id (the line #) to each word.
    It then extract the relevant GloVe vectors and stores them in a np array
    such that the i-th entry corresponds to the i-th word in the vocabulary.


    Args:
        config: (instance of Config) has attributes like hyper-params...

    """
    # get config and processing of words
    config = Config(load=False)
    processing_word = get_processing_word(lowercase=True)  # 把字符全部小写,数字替换成NUM

    # Generators

    to_be_add = CoNLLDataset1(config.filename_test,
                              processing_word)  # 返回一句话(words),和标签tags

    # Build Word and Tag vocab

    vocab_words, _ = get_vocabs([to_be_add])
    vocab_glove = get_glove_vocab(config.filename_glove)  # glove词表

    words_have_vec = vocab_words & vocab_glove

    vocab_words_and_entity = entity2vocab(datasets=[to_be_add],
                                          vocab=words_have_vec)

    vocab_in_file = set(load_vocab(config.filename_words))

    vocab_words_to_be_add = vocab_words_and_entity - vocab_in_file

    if len(vocab_words_to_be_add) != 0:
        with open(config.filename_words, 'a') as f:
            for i, vocab_word in enumerate(vocab_words_to_be_add):
                f.write('\n{}'.format(vocab_word))

    # Trim GloVe Vectors
    vocab = load_vocab(config.filename_words)  # 得到dict类型的vocab:{word:index}
    # 针对vocab,生成numpy的embedding文件,包含一个矩阵,对应词嵌入
    export_trimmed_glove_vectors(vocab, config.filename_glove,
                                 config.filename_trimmed, config.dim_word)
Exemplo n.º 10
0
def main():
    """Procedure to build data

    You MUST RUN this procedure. It iterates over the whole dataset (train,
    dev and test) and extract the vocabularies in terms of words, tags, and
    characters. Having built the vocabularies it writes them in a file. The
    writing of vocabulary in a file assigns an id (the line #) to each word.
    It then extract the relevant GloVe vectors and stores them in a np array
    such that the i-th entry corresponds to the i-th word in the vocabulary.


    Args:
        config: (instance of Config) has attributes like hyper-params...

    """
    # get config and processing of words
    config = Config(load=False)
    processing_word = get_processing_word(lowercase=True)

    # Generators
    dev = CoNLLDataset(config.filename_dev, processing_word=processing_word)
    test = CoNLLDataset(config.filename_test, processing_word=processing_word)
    train = CoNLLDataset(config.filename_train,
                         processing_word=processing_word)

    # Build Word and Tag vocab
    # vocab_words, vocab_tags = get_vocabs([train, dev, test])
    vocab_words, _ = get_vocabs([train, dev])
    _, vocab_tags = get_vocabs([train, dev, test])
    vocab_glove = get_glove_vocab(config.filename_glove)

    vocab = vocab_words | vocab_glove
    vocab.add(NUM)
    vocab.add(UNK)

    # Save vocab
    write_vocab(vocab, config.filename_words)
    write_vocab(vocab_tags, config.filename_tags)

    # Trim GloVe Vectors
    vocab = load_vocab(config.filename_words)
    export_trimmed_glove_vectors(vocab, config.filename_glove,
                                 config.filename_trimmed, config.dim_word)

    # Build and save char vocab
    train = CoNLLDataset(config.filename_train)
    vocab_chars = get_char_vocab(train)
    write_vocab(vocab_chars, config.filename_chars)
def main():
    # create instance of config
    config = Config()
    config.dir_model = config.dir_output + "model.finetuning.weights/"

    # build model
    model = RippleModel(config)
    model.build("fine_tuning")
    model.restore_session(config.dir_model)

    # create dataset
    test = CoNLLDataset(config.filename_test, config.processing_word,
                        config.processing_action, config.max_iter)

    # evaluate and interact
    model.evaluate(test)
Exemplo n.º 12
0
def main():
    config = Config()

    #build model
    model = TaggerModel(config)
    print(model.idx_to_tag)
    model.build()
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    dev = Dataset(config.filename_dev, config.processing_word,
                  config.processing_tag, config.max_iter)
    train = Dataset(config.filename_train, config.processing_word,
                    config.processing_tag, config.max_iter)

    model.train(train, dev)
Exemplo n.º 13
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets
    dev = CoNLLDataset(config.filename_dev, max_iter=config.max_iter)
    train = CoNLLDataset(config.filename_train, max_iter=config.max_iter)

    # train model
    model.train(train, dev)
Exemplo n.º 14
0
def main():
    # create instance of config
    config = Config()
    if config.use_elmo: config.processing_word = None

    #build model
    model = NERModel(config)

    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                         config.processing_tag, config.max_iter, config.use_crf)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter, config.use_crf)

    learn = NERLearner(config, model)
    learn.fit(train, dev)
Exemplo n.º 15
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # create dataset
    test = CoNLLDataset(config.filename_test,
                        config.processing_word,
                        max_iter=config.max_iter)

    # evaluate and interact
    model.classify(test)
Exemplo n.º 16
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # create dataset
    test = PreProcessData(config.f_test, config.processing_word,
                          config.processing_tag, config.max_iter)

    # evaluate and interact
    model.evaluate(test)
    interactive_shell(model)
Exemplo n.º 17
0
def main():
#     with tf.device("/device:GPU:0"):
        # create instance of config
        config = Config()

        # build model
        model = NERModel(config)
        model.build()
        # model.restore_session("results/crf/model.weights/") # optional, restore weights
        # model.reinitialize_weights("proj")s
        # create datasets
        dev   = PreProcessData(config.f_dev,config.processing_word, config.processing_tag, config.max_iter)
        train = PreProcessData(config.f_train, config.processing_word,
                             config.processing_tag, config.max_iter)
        # train model
        model.train(train, dev)
Exemplo n.º 18
0
def aspectExtractor(sentence):
    # create instance of config
    config = Config()
    # build model
    model = ASPECTModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # create dataset
    test = CoNLLDataset(config.filename_test, config.processing_word,
                        config.processing_tag, config.max_iter)

    # evaluate and interact
    model.evaluate(test)
    preds = interactive_shell(model, sentence)
    return preds
Exemplo n.º 19
0
def load(file):
    with open(file) as f:
        data = json.load(f)

    channelIds = set(x["channel"] for x in data["channels"])
    detectorIds = set(x["detector"] for x in data["channels"])

    channelId = IntEnum("ChannelId", list(channelIds))
    detectorId = IntEnum("DetectorId", list(detectorIds))
    channels = {
        c["id"]: Channel(channelId[c["channel"]], detectorId[c["detector"]])
        for c in data["channels"]
    }

    return Config(channelId, detectorId, channels, data["properties"],
                  data["particleTimes"])
Exemplo n.º 20
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = Model(config)
    model.build()
    model.restore_session(
        "results/file_name/model.weights/")  # optional, restore weights

    # create datasets
    train, dev = getDataSet('data/train/pos_sample.txt',
                            'data/train/neg_sample.txt')

    # evaluate model
    model.evaluate(dev)
Exemplo n.º 21
0
def main():
            config = Config()
            dev = CoNLLDataset(config.filename_dev, config.processing_word, config.processing_tag)
            train = CoNLLDataset(config.filename_train, config.processing_word, config.processing_tag)
            test = CoNLLDataset(config.filename_test, config.processing_word,config.processing_tag)
            max_sequence_length = max(max([len(seq[0]) for seq in train]), max([len(seq[0]) for seq in dev]),
                                      max([len(seq[0]) for seq in test]))
            max_word_length = max(max([len(word[0]) for seq in train for word in seq[0]]),
                                  max([len(word[0]) for seq in test for word in seq[0]]),
                                  max([len(word[0]) for seq in dev for word in seq[0]]))
            print(max_word_length, max_sequence_length)
            model = NERModel(config, max_word_length, max_sequence_length)
            model.build()
            model.train(train, dev)
            model.restore_session(config.dir_model)
            model.evaluate(test)
Exemplo n.º 22
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)  # "results/test/model.weights/"

    # create dataset
    test = CoNLLDataset(config.filename_test, config.processing_word,
                        config.processing_tag, config.max_iter)

    # evaluate and interact
    model.evaluate(test)
    interactive_shell(model)  # test in commend lines
Exemplo n.º 23
0
def main():
    # create instance of config
    config = Config(operation='evaluate')

    # build model
    model = REGShell(config)
    model.restore_model(
        'results/train/20180905_112821/model/checkpoint.pth.tar')

    # create datasets
    test = REGDataset(config.filename_test,
                      config=config,
                      max_iter=config.max_iter)

    # evaluate on test set
    model.evaluate(test)
Exemplo n.º 24
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()

    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)
Exemplo n.º 25
0
def init():
    """
    This function is to load the trained Keras model.
    :return - None
    """
    # load the pre-trained tensorflow model
    global model, graph

    # create instance of config
    config = Config()

    # build model
    model = ASPECTModel(config)
    model.build()
    model.restore_session(config.dir_model)
    graph = tf.get_default_graph()
Exemplo n.º 26
0
def main():
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session("./results/server_test/model.weights/")

    # create dataset
    test = CoNLLDataset("../movie-dialogs/benchmark_test.txt",
                        config.processing_word, config.processing_tag,
                        config.max_iter)

    # evaluate and interact
    predictions, correct_predictions = model.predict_all(test)
def main():
    '''
        evaluate using saved models
    '''
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)

    # create dataset
    test = CoNLLDataset(config.filename_test, config.processing_word,
                        config.processing_tag, config.max_iter)
    print(type(config.vocab_words))
    # eval uate and interact
    #model.evaluate(test)
    lab = []
    seqs = []
    for words, labels in minibatches(test, 1):
        temp = []
        temp2 = []
        w = copy.deepcopy(words)
        A = list(w[0])

        labels_pred, sequence_lengths = model.predict_batch(words)
        #fd, sequence_lengths = model.get_feed_dict(words, dropout=1.0)

        for i, y in enumerate(labels_pred[0]):
            x = A[0][i]
            temp3 = []
            for letter in x:
                #print(letter)
                temp3.append(model.idx_to_char[letter])
            temp.append(model.idx_to_tag[y])
            temp2.append(''.join(temp3))
            #temp2.append(model.config.processing_word[x])
        lab.append(temp)
        seqs.append(temp2)
    print(lab[0:3])
    print(seqs[0:3])
    #interactive_shell(model)
    name = 'pred_give_ingredient_dev.txt'
    data = load(config.filename_test)
    print(data[0:3])
    write_pred(data, lab, name)
Exemplo n.º 28
0
 def __init__(self):
     super(base_login, self).__init__()
     self.setupUi(self)
     self.config_object = Config()
     self.message_box = Messagebox()
     self.btn_server.clicked.connect(lambda: self.login_into_server(
         self.username_textbox.text(), self.password_textbox.text()))
     self.btn_client.clicked.connect(lambda: self.login_into_client(
         self.username_textbox.text(), self.password_textbox.text()))
     self.btn_new_reg.clicked.connect(lambda: self.new_user_reg(
         self.new_username_textbox.text(), self.new_password_textbox.text(),
         self.new_confirmed_passw_textbox.text()))
     self.btn_login_cancel.clicked.connect(self.close_main_window)
     self.btn_reg_cancel.clicked.connect(self.close_main_window)
     self.configuration_start()
     self.db_path = self.config_object.get_db_path()
     self.sql = SqliteController()
def main():
    # create instance of config,这里的config实现了load data的作用
    #拥有词表、glove训练好的embeddings矩阵、str->id的function
    config = Config()
    config.nepochs = 200
    config.dropout = 0.5
    config.batch_size = 60
    config.lr_method = "adam"
    config.lr = 0.0005
    config.lr_decay = 1.0
    config.clip = -2.0  # if negative, no clipping
    config.nepoch_no_imprv = 8

    config.dir_model = config.dir_output + "model.finetuning.weights/"

    # build model
    model = NERModel(config)
    model.build("fine_tuning")
    model.restore_session("results/test/model.weights/",
                          indicate="fine_tuning")

    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets [(char_ids), word_id]
    # processing_word = get_processing_word(lowercase=True)
    dev = CoNLLDataset(config.filename_dev)
    train = CoNLLDataset(config.filename_train)
    test = CoNLLDataset(config.filename_test)

    # train model

    train4cl = CoNLLdata4classifier(train,
                                    processing_word=config.processing_word,
                                    processing_tag=config.processing_tag,
                                    context_length=config.context_length)
    dev4cl = CoNLLdata4classifier(dev,
                                  processing_word=config.processing_word,
                                  processing_tag=config.processing_tag,
                                  context_length=config.context_length)
    test4cl = CoNLLdata4classifier(test,
                                   processing_word=config.processing_word,
                                   processing_tag=config.processing_tag,
                                   context_length=config.context_length)

    model.train(train4cl, dev4cl, test4cl)
Exemplo n.º 30
0
def main(argv=None):
    # Configurations
    config = Config()
    config.DATA_DIR = ['/data/']
    config.LOG_DIR = './log/model'
    config.MODE = 'training'
    config.STEPS_PER_EPOCH_VAL = 180
    config.display()

    # Get images and labels.
    dataset_train = Dataset(config, 'train')
    # Build a Graph
    model = Model(config)

    # Train the model
    model.compile()
    model.train(dataset_train, None)