def main():
    # create instance of config,这里的config实现了load data的作用
    #拥有词表、glove训练好的embeddings矩阵、str->id的function
    config = Config()
    config.nepochs = 200
    config.dropout = 0.5
    config.batch_size = 40
    config.lr_method = "adam"
    config.lr = 0.0007
    config.lr_decay = 0.97
    config.clip = -5.0  # if negative, no clipping
    config.nepoch_no_imprv = 20

    # build model
    model = NERModel(config)
    model.build("fine_tuning")
    model.restore_session(config.dir_model)

    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets [(char_ids), word_id]
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)
def predict(weights_path, dataset, abstract_sentences):

    parser = argparse.ArgumentParser()
    config = Config(parser, log_config=False, dataset=dataset)

    # restore model weights
    model = HANNModel(config)
    model.build()
    model.restore_session(weights_path)

    sentences_words = []
    # split abstract to sentences
    for line in abstract_sentences:
        # split line into words and map  words to ids
        sentence = [config.processing_word(word) for word in line.split()]
        sentences_words += [sentence]

    # run prediction
    labels_pred, _ = model.predict_batch([sentences_words])

    # map: label id to label string
    tag_id_to_label = dict((v, k) for k, v in config.vocab_tags.items())

    # convert predicted labels to string
    labels_pred_str = []
    for sublist in labels_pred:
        for item in sublist:
            labels_pred_str.append(tag_id_to_label[item])

    return labels_pred_str
Exemplo n.º 3
0
def main():
    # create instance of config
    config = Config(parser)

    # build model
    model = HANNModel(config)
    model.build()
    ###############################################comment this if model is trained from scratch
    config.restore = True
    if config.restore:
        model.restore_session(
            "/home/lena/Dokumente/Master/dissertation/Data/output/model.weights"
        )  # optional, restore weights
    model.reinitialize_weights("proj")  #reinitialise for this scope
    #####################################################################

    # create datasets
    dev = Dataset(config.filename_dev, config.processing_word,
                  config.processing_tag, config.max_iter)
    train = Dataset(config.filename_train, config.processing_word,
                    config.processing_tag, config.max_iter)
    test = Dataset(config.filename_test, config.processing_word,
                   config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)

    # evaluate model
    model.evaluate(test)
Exemplo n.º 4
0
def predict():
    config = Config()
    threshold = (config.sequence_length / 2) + 1
    config.batch_size = 1
    model = BertModel(config)
    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True
    saver = tf.train.Saver()
    ckpt_dir = config.ckpt_dir
    print("ckpt_dir:", ckpt_dir)
    with tf.Session(config=gpu_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver.restore(sess, tf.train.latest_checkpoint(ckpt_dir))
        for i in range(100):
            # 2.feed data
            input_x = np.random.randn(
                config.batch_size,
                config.sequence_length)  # [None, self.sequence_length]
            input_x[input_x >= 0] = 1
            input_x[input_x < 0] = 0
            target_label = generate_label(input_x, threshold)
            input_sum = np.sum(input_x)
            # 3.run session to train the model, print some logs.
            logit, prediction = sess.run(
                [model.logits, model.predictions],
                feed_dict={
                    model.input_x: input_x,
                    model.dropout_keep_prob: config.dropout_keep_prob
                })
            print("target_label:", target_label, ";input_sum:", input_sum,
                  "threshold:", threshold, ";prediction:", prediction)
            print("input_x:", input_x, ";logit:", logit)
def main():
    # create instance of config
    config = Config()
    config.dir_model = config.dir_output + "model.finetuning.weights/"

    # build model
    model = NERModel(config)
    model.build("fine_tuning")
    model.restore_session(config.dir_model)

    # create dataset
    processing_word = get_processing_word(lowercase=True)

    if len(sys.argv) == 2:
        if sys.argv[1] == 'test':
            test = CoNLLDataset(config.filename_test, processing_word)

        elif sys.argv[1] == 'dev':
            test = CoNLLDataset(config.filename_dev, processing_word)

    else:
        assert len(sys.argv) == 1
        test = CoNLLDataset(config.filename_test, processing_word)

    test4cl = CoNLLdata4classifier(test, processing_word=config.processing_word,
                                   processing_tag=config.processing_tag)

    # evaluate and interact
    model.evaluate(test4cl)
def main(argv=None):
    # Configurations
    config = Config()
    config.DATA_DIR = [
        './data/SiW_M_Makeup_Ob_Binary_Files',
        './data/SiW_M_Mask_Silicone_Binary_Files',
        './data/SiW_M_Makeup_Co_Binary_Files',
        './data/SiW_M_Mask_Paper_Binary_Files',
        './data/SiW_M_Makeup_Im_Binary_Files',
        './data/SiW_M_Mask_Mann_Binary_Files',
        './data/SiW_M_Replay_Binary_Files',
        './data/SiW_M_Partial_Cut_Binary_Files',
        './data/SiW_M_Mask_Half_Binary_Files',
        './data/SiW_M_Partial_Funnyeye_Binary_Files',
        './data/SiW_M_Partial_Paperglass_Binary_Files',
        './data/SiW_M_Mask_Trans_Binary_Files',
        './data/SiW_M_Paper_Binary_Files', './data/SiW_M_Live_Binary_Files',
        './data/SiW_M_Live_Test_Binary_Files'
    ]
    config.DATA_DIR_LIVE = ['./data/SiW_M_Live_Binary_Files']
    config.DATA_DIR_TEST = ['./data/SiW_M_Live_Test_Binary_Files']
    config.LOG_DIR = './logs/model'
    config.MODE = 'training'
    # config.MODE = 'testing'
    config.STEPS_PER_EPOCH_VAL = 180
    config.display()

    # Get images and labels.
    # dataset_train = Dataset(config, 'train')
    # Build a Graph
    model = Model(config)

    # Train the model
    model.compile()
    model.train()
Exemplo n.º 7
0
def main():
    # create instance of config
    config = Config()
    if config.use_elmo: config.processing_word = None

    #build model
    model = NERModel(config)

    learn = NERLearner(config, model)
    learn.load()

    if len(sys.argv) == 1:
        print("No arguments given. Running full test")
        sys.argv.append("eval")
        sys.argv.append("pred")

    if sys.argv[1] == "eval":
        # create datasets
        test = CoNLLDataset(config.filename_test, config.processing_word,
                             config.processing_tag, config.max_iter)
        learn.evaluate(test)

    if sys.argv[1] == "pred" or sys.argv[2] == "pred":
        try:
            sent = (sys.argv[2] if sys.argv[1] == "pred" else sys.argv[3])
        except IndexError:
            sent = ["Peter", "Johnson", "lives", "in", "Los", "Angeles"]

        print("Predicting sentence: ", sent)
        pred = learn.predict(sent)
        print(pred)
Exemplo n.º 8
0
def main(data_prefix=None):
    # create instance of config
    config = Config()

    # build model
    model = NERModel(config)
    model.build()
    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    if data_prefix:
        cwd = os.getcwd()
        config.filename_dev = os.path.join(
            cwd, 'data',
            data_prefix + '_' + os.path.basename(config.filename_dev))
        config.filename_test = os.path.join(
            cwd, 'data',
            data_prefix + '_' + os.path.basename(config.filename_test))
        config.filename_train = os.path.join(
            cwd, 'data',
            data_prefix + '_' + os.path.basename(config.filename_train))

    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    # train model
    print('training')
    model.train(train, dev)
def main():
    # create instance of config
    config = Config()
    config.dir_model = config.dir_output + "model.finetuning.weights/"

    # build model
    model = NERModel(config)
    model.build("fine_tuning")
    model.restore_session(config.dir_model)

    # create dataset
    if len(sys.argv) == 2:
        if sys.argv[1] == 'test':
            test = CoNLLDataset(config.filename_test, config.processing_word,
                                config.processing_tag, config.max_iter)
        elif sys.argv[1] == 'dev':
            test = CoNLLDataset(config.filename_dev, config.processing_word,
                                config.processing_tag, config.max_iter)
    else:
        assert len(sys.argv) == 1
        test = CoNLLDataset(config.filename_test, config.processing_word,
                            config.processing_tag, config.max_iter)

    # evaluate and interact
    model.evaluate(test)
Exemplo n.º 10
0
def main():
    # Предсказания моделью первого уровня #
    config_first = Config(dir_output='./results/train_first/')
    model = NERModel(config_first)
    model.build()
    model.restore_session(config_first.dir_model)
    test = CoNLLDataset(config_first.filename_test,
                        config_first.processing_word,
                        config_first.processing_tag, config_first.max_iter)

    print()
    print('Predicting first stage!')
    model.evaluate(test)
    print()

    test_predictions = model.predict_test(test)
    formatted_predictions = format_predictions(test_predictions, 'test',
                                               config_first)

    # Предсказания моделью второго уровня #
    tf.reset_default_graph()
    config_second = Config(dir_output='./results/train_second/')
    model = NERModel2(config_second)
    model.build()
    model.restore_session(config_second.dir_model)

    print()
    print('Predicting second stage!')
    model.evaluate(formatted_predictions)
    print()
Exemplo n.º 11
0
def main():
    config = Config('./results/train_folds/')
    train_predictions_file = './data/predictions/formatted_train_predictions.npy'

    kf = KFold(n_splits=5)

    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    train = np.array([el for el in train])
    predictions = [0 for _ in train]

    for train_ids, evaluate_ids in kf.split(train):
        train_dataset = train[train_ids]
        evaluate_dataset = train[evaluate_ids]
        tf.reset_default_graph()
        config = Config('./results/train_folds/')
        model = NERModel(config)
        model.build()
        model.train(train_dataset, evaluate_dataset)
        for id, tags in zip(evaluate_ids,
                            model.predict_test(evaluate_dataset)):
            predictions[id] = tags
        model.close_session()

    predictions = np.array(predictions)
    formatted_predictions = format_predictions(predictions, 'train', config)
    np.save(train_predictions_file, formatted_predictions)
Exemplo n.º 12
0
def compute_confuse_matrix(fname, classes):
    """
    Give a file, compute confuse matrix of y_true and y_pred.
    """
    print('im in')
    y_true = []
    with codecs.open(fname, 'r', 'utf8') as f:
        for line in f:
            line = line.strip().split('\t')[-1]
            y_true.append(line)

    checkpoint_dir = "output/self_attention/multi_attention_0802/"
    pred_path = "tmp/eval_y_self_attention.txt"
    if os.path.exists(checkpoint_dir + 'config.pkl'):
        config = pickle.load(open(checkpoint_dir + 'config.pkl', 'rb'))
    else:
        config = Config()

    config.mode = 'inference'

    word2id, id2word = read_vocab(config.word_vocab_file)
    tag2id, id2tag = read_vocab(config.tag_vocab_file)

    with tf.Session(config=get_config_proto(
            log_device_placement=False)) as sess:
        model = get_model(config.model, config, sess)
        model.build()
        model.restore_model(checkpoint_dir)
        y_pred = infer_file(model, word2id, id2tag, fname, pred_path)

    cmatrix = confusion_matrix(y_true, y_pred, classes)
    print(cmatrix)
    correct = [x == y for x, y in list(zip(y_true, y_pred))]
    print(correct.count(True) / len(correct))
    return cmatrix
Exemplo n.º 13
0
    def apply_changes(self, *args):
        try:
            image_quality = int(args[0])
            if image_quality < 0:
                image_quality = 0
            if image_quality > 100:
                image_quality = 100
            Config.data['quality'] = image_quality
        except:
            PopupMsg().show(Text.data['invalid_quality'])

        if self.isValidIPAddress(args[2]):
            Config.data['server_address'] = args[2]
        else:
            PopupMsg().show(Text.data['invalid_server_address'])

        try:
            port = int(args[3])
            if port < 0:
                port = 0
            if port > 65535:
                port = 65535
            Config.data['server_port'] = port
        except:
            PopupMsg().show(Text.data['invalid_зщке'])

        Config.data['server_mode'] = args[1]
        Config.set_config()
Exemplo n.º 14
0
 def initialize(self):
     Data.initialize()
     Config.initialize()
     self.Delete = None
     self.Search = None
     self.Plaster = None
     self.Config = None
     self.currentMenu = None
Exemplo n.º 15
0
    def __init__(self,
                 optimization_folder,
                 nr_iterations,
                 iteration_chunck_size,
                 nr_init_points,
                 embedder='LASEREmbedderI',
                 log_file_name='logs.json',
                 load_log=False,
                 prev_log='log.json',
                 probe=True):

        # Set static variables
        self.INTERMEDIATE_RESULTS_FOLDER = optimization_folder
        self.FINAL_RESULTS_FOLDER = optimization_folder
        self.NR_ITERATIONS = nr_iterations
        self.ITERATION_CHUNCK_SIZE = iteration_chunck_size
        self.NR_INIT_POINTS = nr_init_points
        self.EMBEDDER = embedder
        self.probe = probe
        self.config = Config()
        self.log_file = os.path.join(optimization_folder, log_file_name)
        self.prev_log = os.path.join(optimization_folder, prev_log)
        self.logger = JSONLogger(path=self.log_file)
        self.load_log = load_log
        # Boundaries between which to explore the input space
        self.param_boundaries = {
            'dropout_before_laser': (0., 0.5),
            'dropout_in_laser': (0., 0.5),
            'transformer_drop': (0., 0.5),
            'dropout': (0., 0.5),
            'hidden_size_lstm': (50, 350),
            'weight_decay': (0., 0.1),
            'learning_rate_warmup_steps': (1., 10.0),
            'num_heads': (0.5, 4.49),
            'filter_size': (3.5, 350)
        }
        # Set points on which to evaluate the model for exploration of the solution space
        self.explore_points = {
            'dropout_before_laser': [0.1],
            'dropout_in_laser': [0.25],
            'transformer_drop': [0.0],
            'dropout': [0.0],
            'hidden_size_lstm': [350],
            'weight_decay': [0.01],
            'learning_rate_warmup_steps': [2.],
            'num_heads': [4.],
            'filter_size': [350.]
        }

        self.bo = None  # initialize variable for further error handling

        assert len(
            np.unique([len(n) for n in self.explore_points.values()])
        ) == 1, 'number of explore points should be the same for all parameters'
        self.NUM_EXPLORE_POINTS = np.unique(
            [len(n) for n in self.explore_points.values()])[0]
Exemplo n.º 16
0
 def __init__(self):
     config = Config()
     self.__db_path = config.get_db_path() + r'\RMsystem.db'
     self.__conn = None
     self.cursor = None
     self.message_box = Messagebox()
     if self.__db_path:
         self.__open(self.__db_path)
     else:
         self.message_box.window_execution('Database path is missing!',
                                           MessageBoxType.ERROR)
Exemplo n.º 17
0
def main():
    """Procedure to build data

    You MUST RUN this procedure. It iterates over the whole dataset (train,
    dev and test) and extract the vocabularies in terms of words, tags, and
    characters. Having built the vocabularies it writes them in a file. The
    writing of vocabulary in a file assigns an id (the line #) to each word.
    It then extract the relevant GloVe vectors and stores them in a np array
    such that the i-th entry corresponds to the i-th word in the vocabulary.


    Args:
        config: (instance of Config) has attributes like hyper-params...

    """
    # get config and processing of words
    config = Config(load=False)
    if config.task == 'pos':
        print("USING POS")
        config.filename_train = "data/train.pos"  # test
        config.filename_dev = "data/dev.pos"
        config.filename_test = "data/test.pos"
    else:
        print("USING NER")
    processing_word = get_processing_word(lowercase=True)

    # Generators
    dev = CoNLLDataset(config.filename_dev, processing_word)
    test = CoNLLDataset(config.filename_test, processing_word)
    train = CoNLLDataset(config.filename_train, processing_word)

    # Build Word and Tag vocab
    vocab_words, vocab_tags = get_vocabs([train, dev, test])
    vocab_glove = get_glove_vocab(config.filename_glove)

    vocab = vocab_words & vocab_glove
    vocab.add(UNK)
    vocab.add(NUM)

    # Save vocab
    write_vocab(vocab, config.filename_words)
    write_vocab(vocab_tags, config.filename_tags)

    # Trim GloVe Vectors
    vocab = load_vocab(config.filename_words)
    export_trimmed_glove_vectors(vocab, config.filename_glove,
                                 config.filename_trimmed, config.dim_word)

    # Build and save char vocab
    train = CoNLLDataset(config.filename_train)
    vocab_chars = get_char_vocab(train)
    write_vocab(vocab_chars, config.filename_chars)
Exemplo n.º 18
0
def setup(argv):
    config = Config()
    payment = Payment()
    payment.clean()
    sections = config.get_sections()
    for section in sections:
        section_config = config.get_section_config(section)
        if section.startswith('Vietcombank') \
                or section.startswith('Msb') \
                or section.startswith('Techcombank') \
                or section.startswith('Klikbca'):
            payment.set_name(section)
            payment.set_username(section_config['username'])
            payment.save()
Exemplo n.º 19
0
 def __init__(self, payment, session=None, proxy={}):
     self.session = session
     self.proxy = proxy
     self.payment = payment
     self.config = Config()
     self.log = Log()
     techcombank = self.get_techcombank_config()
     self.email_transport = EmailTransport()
     self.login_url = techcombank['login_url']
     self.username = payment.get_username()
     self.password = payment.get_password()
     self.debug_mode = techcombank['debug_mode']
     self.total_transactions = 0
     self.history = History()
     self.code = GenerateCode()
Exemplo n.º 20
0
def main(argv=None): 
  # Configurations
  config = Config(gpu='1',
                  root_dir='./data/test/',
                  root_dir_val=None,
                  mode='testing')
  config.BATCH_SIZE = 1

  # Get images and labels.
  dataset_test = Dataset(config, 'test')

  # Train
  _M, _s, _b, _C, _T, _imname = _step(config, dataset_test, False)

  # Add ops to save and restore all the variables.
  saver = tf.train.Saver(max_to_keep=50,)
  with tf.Session(config=config.GPU_CONFIG) as sess:
    # Restore the model
    ckpt = tf.train.get_checkpoint_state(config.LOG_DIR)
    if ckpt and ckpt.model_checkpoint_path:
      saver.restore(sess, ckpt.model_checkpoint_path)
      last_epoch = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
      print('**********************************************************')
      print('Restore from Epoch '+str(last_epoch))
      print('**********************************************************')
    else:
      init = tf.initializers.global_variables()
      last_epoch = 0
      sess.run(init)
      print('**********************************************************')
      print('Train from scratch.')
      print('**********************************************************')

    step_per_epoch = int(len(dataset_test.name_list) / config.BATCH_SIZE)
    with open(config.LOG_DIR + '/test/score.txt', 'w') as f:
      for step in range(step_per_epoch):
        M, s, b, C, T, imname = sess.run([_M, _s, _b, _C, _T, _imname])
        # save the score
        for i in range(config.BATCH_SIZE):
            _name = imname[i].decode('UTF-8')
            _line = _name + ',' + str("{0:.3f}".format(M[i])) + ','\
                                + str("{0:.3f}".format(s[i])) + ','\
                                + str("{0:.3f}".format(b[i])) + ','\
                                + str("{0:.3f}".format(C[i])) + ','\
                                + str("{0:.3f}".format(T[i]))
            f.write(_line + '\n')  
            print(str(step+1)+'/'+str(step_per_epoch)+':'+_line, end='\r')  
    print("\n")
Exemplo n.º 21
0
def main():
    # create instance of config
    config = Config()
    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                       config.processing_tag, config.processing_pos,
                       config.processing_chunk, config.max_iter)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.processing_pos,
                         config.processing_chunk, config.max_iter)
    test = CoNLLDataset(config.filename_test, config.processing_word,
                        config.processing_tag, config.processing_pos,
                        config.processing_chunk, config.max_iter)
    max_sequence_length = max(max([len(seq[0]) for seq in train]),
                              max([len(seq[0]) for seq in dev]),
                              max([len(seq[0]) for seq in test]))

    max_word_length = max(
        max([len(word[0]) for seq in train for word in seq[0]]),
        max([len(word[0]) for seq in test for word in seq[0]]),
        max([len(word[0]) for seq in dev for word in seq[0]]))
    print(max_word_length, max_sequence_length)
    model = NERModel(config, max_word_length, max_sequence_length)
    model.build()
    model.train(train, dev)
    model.restore_session(config.dir_model)
    model.evaluate(test)
Exemplo n.º 22
0
def get_model_api():
    """Returns lambda function for api"""

    # 1. initialize model once and for all
    config = Config()
    model = NERModel(config)
    model.build()
    model.restore_session("results/crf/model.weights/")

    def model_api(input_data):
        """
        Args:
            input_data: submitted to the API, raw string

        Returns:
            output_data: after some transformation, to be
                returned to the API

        """
        # 2. process input
        punc = [",", "?", ".", ":", ";", "!", "(", ")", "[", "]"]
        s = "".join(c for c in input_data if c not in punc)
        words_raw = s.strip().split(" ")

        # 3. call model predict function
        preds = model.predict(words_raw)

        # 4. process the output
        output_data = align_data({"input": words_raw, "output": preds})

        # 5. return the output for the api
        return output_data

    return model_api
Exemplo n.º 23
0
def main():
    # create instance of config
    config = Config()
    if config.use_elmo: config.processing_word = None

    #build model
    model = NERModel(config)

    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                         config.processing_tag, config.max_iter, config.use_crf)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter, config.use_crf)

    learn = NERLearner(config, model)
    learn.fit(train, dev)
Exemplo n.º 24
0
 def __init__(self):
     self.config = Config()
     self.config.dim_word = 250
     self.config.dim_char = 50
     self.model = NERModel(self.config)
     self.model.build()
     self.model.restore_session(self.MODEL_DIR)
Exemplo n.º 25
0
    def write(md5, data):
        save_path = Config.get("data_save_path")
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        with open(os.path.join(save_path, md5), 'wb') as file:
            file.write(data)
Exemplo n.º 26
0
def main():
    # create instance of config,这里的config实现了load data的作用
    #拥有词表、glove训练好的embeddings矩阵、str->id的function
    config = Config()

    # build model
    model = NERModel(config)
    model.build("train")

    # model.restore_session("results/crf/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets [(char_ids), word_id]
    # processing_word = get_processing_word(lowercase=True)
    dev = CoNLLDataset(config.filename_dev)
    train = CoNLLDataset(config.filename_train)
    test = CoNLLDataset(config.filename_test)

    train4cl = CoNLLdata4classifier(train,
                                    processing_word=config.processing_word,
                                    processing_tag=config.processing_tag)
    dev4cl = CoNLLdata4classifier(dev,
                                  processing_word=config.processing_word,
                                  processing_tag=config.processing_tag)
    test4cl = CoNLLdata4classifier(test,
                                   processing_word=config.processing_word,
                                   processing_tag=config.processing_tag)

    # train model
    model.train(train4cl, dev4cl, test4cl)
Exemplo n.º 27
0
def main():

    # get config and processing of words
    config = Config(load=False)
    processing_word = get_processing_word(lowercase=True)

    # Generators
    dev = CoNLLDataset(config.filename_dev, processing_word)
    test = CoNLLDataset(config.filename_test, processing_word)
    train = CoNLLDataset(config.filename_train, processing_word)

    # Build Word and Tag vocab
    vocab_words, vocab_tags = get_vocabs([train, dev, test])
    vocab_glove = get_glove_vocab(config.filename_glove)

    vocab = vocab_words & vocab_glove
    vocab.add(UNK)
    vocab.add(NUM)

    # Save vocab
    write_vocab(vocab, config.filename_words)
    write_vocab(vocab_tags, config.filename_tags)

    # Trim GloVe Vectors
    vocab = load_vocab(config.filename_words)
    export_trimmed_glove_vectors(vocab, config.filename_glove,
                                 config.filename_trimmed, config.dim_word)

    # Build and save char vocab
    train = CoNLLDataset(config.filename_train)
    vocab_chars = get_char_vocab(train)
    write_vocab(vocab_chars, config.filename_chars)
Exemplo n.º 28
0
def main():
    # create instance of config
    config_file = sys.argv[1]

    config = Config(config_file)

    print("dir model : ", config.dir_model)

    # build model
    model = NERModel(config)
    model.build()
    model.restore_session(config.dir_model)
    # model.reinitialize_weights("words")
    # model.reinitialize_weights("chars")
    # model.reinitialize_weights("train_step")

    # Evaluate on another data set
    if len(sys.argv) > 2:
        test_file_name = sys.argv[2]
        test = CoNLLDataset(test_file_name, config.processing_word,
                            config.processing_tag, config.max_iter)
        print("Testing on ", test_file_name, "..")


    # create dataset
    else:
        test = CoNLLDataset(config.filename_test, config.processing_word,
                            config.processing_tag, config.max_iter)
        print("Testing on ", config.filename_test, "..")

        # evaluate and interact
    # model.predict_test(test, output=sys.stdout)
    model.evaluate(test)
Exemplo n.º 29
0
def main():
    # create instance of config
    config = Config(parser)

    # build model
    model = HANNModel(config)
    model.build()
    if config.restore:
        model.restore_session(
            "results/test/model.weights/")  # optional, restore weights

    # create datasets
    dev = Dataset(config.filename_dev, config.processing_word,
                  config.processing_tag, config.max_iter)
    train = Dataset(config.filename_train, config.processing_word,
                    config.processing_tag, config.max_iter)
    test = Dataset(config.filename_test, config.processing_word,
                   config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)

    # evaluate model
    model.restore_session(config.dir_model)
    metrics = model.evaluate(test)

    with open(os.path.join(config.dir_output, 'test_results.txt'),
              'a') as file:
        file.write('{}\n'.format(metrics['classification-report']))
        file.write('{}\n'.format(metrics['confusion-matrix']))
        file.write('{}\n\n'.format(metrics['weighted-f1']))
def train():
    # 1.init config and model
    config=Config()
    threshold=(config.sequence_length/2)+1
    model = BertCNNModel(config)
    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True
    saver = tf.train.Saver()
    save_path = config.ckpt_dir + "model.ckpt"
    #if not os.path.exists(config.ckpt_dir):
    #    os.makedirs(config.ckpt_dir)
    batch_size = 8
    with tf.Session(config=gpu_config) as sess:
        sess.run(tf.global_variables_initializer())
        if os.path.exists(config.ckpt_dir): #
            saver.restore(sess, tf.train.latest_checkpoint(save_path))
        for i in range(10000):
            # 2.feed data
            input_x = np.random.randn(config.batch_size, config.sequence_length)  # [None, self.sequence_length]
            input_x[input_x >= 0] = 1
            input_x[input_x < 0] = 0
            input_y = generate_label(input_x,threshold)
            p_mask_lm=[i for i in range(batch_size)]
            # 3.run session to train the model, print some logs.
            loss, _ = sess.run([model.loss_val,  model.train_op],feed_dict={model.x_mask_lm: input_x, model.y_mask_lm: input_y,model.p_mask_lm:p_mask_lm,
                                                                            model.dropout_keep_prob: config.dropout_keep_prob})
            print(i, "loss:", loss, "-------------------------------------------------------")
            if i==300:
                print("label[0]:", input_y[0]);print("input_x:",input_x)
            if i % 500 == 0:
                saver.save(sess, save_path, global_step=i)
Exemplo n.º 31
0
def send_custom_msg(msg, reply):
    custom_text = copy.deepcopy(wxconfig.custom_text_template)
    custom_text['touser'] = msg.fromusername
    custom_text['text']['content'] = reply
    logging.debug(custom_text)

    config = yield dbutil.do(Config.select().get)
    url = wxconfig.custom_msg_url.format(config.accesstoken)
    logging.debug(url)

    logging.debug(json.dumps(custom_text, ensure_ascii=False, indent=4))
    http_client = AsyncHTTPClient()
    response = yield http_client.fetch(url, **{'method': 'POST',
                                               'body': json.dumps(custom_text, ensure_ascii=False)})
    logging.debug(response.body.decode())
Exemplo n.º 32
0
def refresh_access_token():
    logging.info('开始刷新微信access token')
    try:
        config = yield dbutil.do(Config.select().get)
        http_client = AsyncHTTPClient()
        logging.debug(wxconfig.access_token_url.format(config.appid, config.appsecret))
        response = yield http_client.fetch(
            wxconfig.access_token_url.format(config.appid, config.appsecret))
        logging.info(response.body.decode())
        result = json.loads(response.body.decode())
        config.accesstoken = result['access_token']
        config.jsapiticket = yield refresh_jsapi_ticket(config.accesstoken)
        config.expires = result['expires_in']
        logging.debug(config.accesstoken)
        logging.debug(config.jsapiticket)
        logging.debug(config.expires)
        yield dbutil.do(config.save)
    except Exception:
        pass
Exemplo n.º 33
0
    def encAES(self, m):
        if self.dek is None:
            raise Exception('DEK is null, cannot encrypt')

        a = AuthenticatedCryptoAbstraction(bytes(self.dek, "utf-8"))
        CT_AES = a.encrypt(m)
        groupObj = PairingGroup('SS512')

        return objectToBytes(CT_AES, groupObj)

    def loop_end(self):
        self._client.loop_stop();
        self._client.disconnect();
            
                                
if __name__ == '__main__':
    import sys, os

    sys.path.append(os.path.join(os.path.dirname(__file__), '../'))
    from model.config import Config

    groupCfg = Config('../conf/gc_config.ini')
    cfg = groupCfg.getConfig("mqtt_client")
    cfg['topic'] = "test/topic"
    pub = Publisher(cfg)
    pub.loop();
    pub.publish("dude");
    pub.loop_end()

Exemplo n.º 34
0
        
        
        a = AuthenticatedCryptoAbstraction(bytes(self.dek, "utf-8"))
        #CT_AES = a.encrypt(message)
        return a.decrypt(CT)

    def on_log(self, client, userdata, level, buf):
        print("log: " + str(level) + ": " + str(buf));

if __name__ == '__main__':
    import sys, os
    
    sys.path.append(os.path.join(os.path.dirname(__file__), '../'))    
    from model.config import Config
    
    groupCfg = Config('../conf/gc_config.ini')
    
    sub = Subscriber(groupCfg.getConfig("mqtt_client"))
    sub.loop()

#    client = mqtt.Client()
#    client.on_connect = on_connect
#    client.on_message = on_message
#    client.on_log = on_log
    
    #ssh -N al@me -L 1883/localhost/1883
#    client.connect(cfg['host'], int(cfg['port']), int(cfg['keepalive']))
    #client.connect("localhost", 1883, 360)
    
    # Blocking call that processes network traffic, dispatches callbacks and
    # handles reconnecting.