Exemplos de generate_data em Python, exemplos de utils.utils.generate_data em Python

Exemplo n.º 1

0

Exibir arquivo

def train_crf():
	word2id, id2word = load_data(TOKEN_DATA)
	tag2id, id2tag = load_data(TAG_DATA)
	_, _, train_, x_train, y_train = generate_data(TRAIN_DATA, word2id, tag2id, max_len=hp.max_len)
	_, _, dev_seq_lens, x_dev, y_dev = generate_data(DEV_DATA, word2id, tag2id, max_len=hp.max_len)
	model_file = "logdir/model_crf"
	model = CRF()
	model.fit(x_train, y_train, template_file='model/module/templates.txt', model_file=model_file, max_iter=20)
	pre_seq = model.predict(x_dev, model_file=model_file)
	acc, p, r, f = get_ner_fmeasure(y_dev, pre_seq)
	print('acc:\t{}\tp:\t{}\tr:\t{}\tf:\t{}\n'.format(acc, p, r, f))

Exemplo n.º 2

0

Exibir arquivo

def train_hmm():
	word2id, id2word = load_data(TOKEN_DATA)
	tag2id, id2tag = load_data(TAG_DATA)
	_, _, train_, x_train, y_train = generate_data(TRAIN_DATA, word2id, tag2id, max_len=hp.max_len)
	_, _, dev_seq_lens, x_dev, y_dev = generate_data(DEV_DATA, word2id, tag2id, max_len=hp.max_len)
	model_file = "logdir/model_hmm"
	model = HMM()

	model.fit(x_train, y_train, model_file=model_file)
	pre_seq = model.predict(x_dev, model_file=model_file)
	acc, p, r, f = get_ner_fmeasure(y_dev, pre_seq)
	print('acc:\t{}\tp:\t{}\tr:\t{}\tf:\t{}\n'.format(acc, p, r, f))

Exemplo n.º 3

0

Exibir arquivo

def train(network='rnn'):
	word2id, id2word = load_data(TOKEN_DATA)
	tag2id, id2tag = load_data(TAG_DATA)
	x_train, y_train, seq_lens, _, _ = generate_data(TRAIN_DATA, word2id, tag2id, max_len=hp.max_len)
	x_dev, y_dev, dev_seq_lens, _, source_tag = generate_data(DEV_DATA, word2id, tag2id, max_len=hp.max_len)
	vocab_size = len(word2id)
	num_tags = len(tag2id)
	if network == "transformer":
		model = TransformerCRFModel(vocab_size, num_tags, is_training=True)
	elif network == 'rnn':
		model = BiRnnCRF(vocab_size, num_tags)
	elif network == 'cnn':
		model = CnnCRF(vocab_size, num_tags)
	elif network == 'match-pyramid':
		model = CnnCRF(vocab_size, num_tags)
	else:
		return
	sv = tf.train.Supervisor(graph=model.graph, logdir=logdir, save_model_secs=0)
	with sv.managed_session() as sess:
		for epoch in range(1, hp.num_epochs + 1):
			if sv.should_stop():
				break
			train_loss = []
			for x_batch, y_batch, len_batch in batch_data(x_train, y_train, seq_lens, hp.batch_size):
				feed_dict = {model.x: x_batch, model.y: y_batch, model.seq_lens: len_batch}
				loss, _ = sess.run([model.loss, model.train_op], feed_dict=feed_dict)
				train_loss.append(loss)
			
			dev_loss = []
			predict_lists = []
			for x_batch, y_batch, len_batch in batch_data(x_dev, y_dev, dev_seq_lens, hp.batch_size):
				feed_dict = {model.x: x_batch, model.y: y_batch, model.seq_lens: len_batch}
				loss, logits = sess.run([model.loss, model.logits], feed_dict)
				dev_loss.append(loss)
				
				transition = model.transition.eval(session=sess)
				pre_seq = model.predict(logits, transition, len_batch)
				pre_label = recover_label(pre_seq, len_batch, id2tag)
				predict_lists.extend(pre_label)
			train_loss_v = np.round(float(np.mean(train_loss)), 4)
			dev_loss_v = np.round(float(np.mean(dev_loss)), 4)
			print('****************************************************')
			acc, p, r, f = get_ner_fmeasure(source_tag, predict_lists)
			print('epoch:\t{}\ttrain loss:\t{}\tdev loss:\t{}'.format(epoch, train_loss_v, dev_loss_v))
			print('acc:\t{}\tp:\t{}\tr:\t{}\tf:\t{}'.format(acc, p, r, f))
			print('****************************************************\n\n')

Exemplo n.º 4

0

Exibir arquivo

                               num_labels=43)
    else:
        raise (RuntimeError("unknown model: " + args.model))

    print("Evaluating", modelfile)
    sys.stdout.flush()

    random.seed(1215)
    np.random.seed(1215)
    """
    Generate data
    """
    inputs, targets, true_labels, true_ids, img_info = generate_data(
        data,
        samples=args.numimage,
        targeted=targeted,
        random_and_least_likely=True,
        target_type=target_type,
        predictor=model.model.predict,
        start=args.startimage)
    # get the logit layer predictions
    preds = model.model.predict(inputs)

    Nsamp = 0
    r_sum = 0.0
    r_gx_sum = 0.0
    """
    Start computing robustness bound
    """
    print("starting robustness verification on {} images!".format(len(inputs)))
    sys.stdout.flush()
    sys.stderr.flush()

Exemplo n.º 5

0

Exibir arquivo

    def __init__(self, config, ctx, logger, test=False):
        self.ctx = ctx
        self.config = config
        self.test = test
        self.logger = logger
        # parse config
        self.epochs = config['epochs']
        self.phi = config['phi']
        self.num_of_vertices = config['num_of_vertices']
        self.adj_filename = config['adj_filename']
        self.id_filename = config['id_filename']
        self.time_series_filename = config['graph_signal_matrix_filename']
        self.pearsonr_adj_filename = config['pearsonr_adj_filename']
        self.time_max = config['time_max']
        self.n = config['n']
        # load data
        self.dataset_name = os.path.split(self.adj_filename)[1].replace(
            ".csv", "")
        time_series_matrix = np.load(self.time_series_filename)['data'][:, :,
                                                                        0]
        adj_SIPM1 = SIPM1(filepath=self.pearsonr_adj_filename,
                          time_series_matrix=time_series_matrix,
                          num_of_vertices=self.num_of_vertices,
                          phi=self.phi)
        adj_SIPM4 = get_adjacency_matrix(self.adj_filename,
                                         self.num_of_vertices,
                                         id_filename=self.id_filename)
        self.adj_SIPM = (adj_SIPM1, adj_SIPM4)

        # action_space = discrete(0,n+2) which will be mapped into discrete(-1,0,...,n,n+1(train_state)) as the def in the paper
        self.action_space = spaces.MultiDiscrete([4, 3, 4, self.n - 1, 1])
        self.observation_space = spaces.Box(low=np.array([-2, -1, -1, -1, -1]),
                                            high=np.array(
                                                [self.n, 4, 3, 4, self.n - 1]))

        # doesn't contains training stage action
        self.action_trajectory = []
        self.actions = []
        self.state_trajectory = []
        self.current_state_phase = -1
        self.training_stage = False
        self.training_stage_action = None

        self.data = {}
        self.batch_size_option = [32, 50, 64]
        self.transformer = {}
        self.train_set_sample_num = 0
        self.eval_set_sample_num = 0
        self.test_set_sample_num = 0
        for batch_size in self.batch_size_option:
            loaders = []
            true_values = []
            for idx, (x, y) in enumerate(
                    generate_data(self.time_series_filename)):
                if idx == 0:
                    self.train_set_sample_num = x.shape[0]
                elif idx == 1:
                    self.eval_set_sample_num = x.shape[0]
                else:
                    self.test_set_sample_num = x.shape[0]
                y = y.squeeze(axis=-1)
                print(x.shape, y.shape)
                self.logger.append_log_file(str((x.shape, y.shape)))
                loaders.append(
                    mx.io.NDArrayIter(x,
                                      y,
                                      batch_size=batch_size,
                                      shuffle=(idx == 0),
                                      label_name='label'))
                if idx == 0:
                    self.training_samples = x.shape[0]
                else:
                    true_values.append(y)
                self.data[batch_size] = loaders