示例#1
0
    def __init__(self,
                 source_path,
                 target_path,
                 pos_num=1,
                 neg_num=1,
                 test_split=0.1):
        self.pos_num = pos_num
        self.neg_num = neg_num

        self.test_split = test_split
        self.labels = [0, 1]

        train_data, train_labels, self.target_train_data, self.target_train_labels, \
        self.target_test_data, self.target_test_labels, self.columns = read_data(source_path, target_path,
                                                                                  self.test_split, re_sample=True)

        self.data_source = train_data, train_labels
        self.data_target = data_normalization(
            self.target_train_data), self.target_train_labels.astype(np.int32)

        self.test_data = data_normalization(self.target_test_data)
        self.test_labels = self.target_test_labels.astype(np.int32)

        self.source_pos_neg_ids = self.get_pos_neg_ids(self.data_source)
        self.target_pos_neg_ids = self.get_pos_neg_ids(self.data_target)
示例#2
0
def ensemble(pos_file, neg_file, i, dir, soft_target_path, sequence_length=30):
    probabilities = list()
    i = int(i)
    for i in range(i):
        path = dir + str(i + 1)
        with codecs.open(path, 'r', encoding="utf-8") as f:
            prob = f.readline()
            prob = prob.strip().strip('#')
            prob = prob.split('#')
            b = []  #b里包括一个文件(一个checkpoint)跑出的所有结果
            for pro in prob:
                pos, neg = pro.split(',')
                pos = float(pos)
                neg = float(neg)
                a = [pos, neg]  #a里包括一个样本跑出的结果
                b.append(a)
        probabilities.append(b)

    x, tags, deps, heads, y = data_loader.read_data(pos_file, neg_file,
                                                    sequence_length)
    label = np.argmax(y, axis=1)
    neg_y = np.sum(label == 1)

    probabilities = np.array(probabilities)
    print(probabilities.shape)
    probability = np.mean(probabilities, axis=0)
    assert len(probability) == len(probabilities[0])
    pre = np.argmax(probability, axis=1)
    accuracy = np.sum(pre == label) / len(label)
    count = 0
    for i in range(len(pre)):
        if pre[i] == 1 and pre[i] == label[i]:
            count += 1
    recall = count / neg_y
    precision = count / np.sum(pre == 1)

    print("*" * 20 + "\nEnsemble Model:\n")
    print("Accuracy:", accuracy, "Recall:", recall, "Precision:", precision)
    print("\n" + "*" * 20)

    print("-" * 20 + "\nWriting soft-target!\n" + "-" * 20)
    assert len(probability) == len(y) and len(pre) == len(label)
    print("Data Numbers:", len(probability))
    with codecs.open(soft_target_path, "w", encoding="utf-8") as ff:
        for i in range(len(pre)):
            if pre[i] == label[i]:
                soft_target = 0.1 * y[i] + 0.9 * probability[i]
            else:
                soft_target = 0.4 * y[i] + 0.6 * probability[i]

            pos, neg = soft_target
            pos, neg = str(pos), str(neg)
            ff.write(pos + "," + neg + "\n")
示例#3
0
def read_data():
    '''
    '''
    ''' 
    Load data
    '''
    file_name = './checkpoint/data/{}/{}_@[email protected]'.format(
        params['data_set_name'], params['data_set_name'])
    if params['back_translation_file'] is True:
        file_name = './checkpoint/{}/{}_@dpart@_bt.pickle'.format(
            params['data_set_name'], params['data_set_name'])
    if os.path.exists(file_name.replace(
            '@dpart@', 'vocab')) and os.path.exists(
                file_name.replace('@dpart@', 'test')) and os.path.exists(
                    file_name.replace('@dpart@', 'train')) and os.path.exists(
                        file_name.replace('@dpart@', 'val')):
        print('Data exists will not parse')
        return

    if params['data_set_name'] == 'cnn':
        print('Loading CNN data')
        train_data, val_data, test_data, word2id_dictionary, id2word_dictionary = dL.read_cnn_dm_data(
            params['DATA_Path'],
            limit_vocab=params['vocab_size'],
            use_back_translation=params['use_back_translation'],
            back_translation_file=params['back_translation_file'])

    elif params['data_set_name'] == 'github':
        train_data, val_data, test_data, word2id_dictionary, id2word_dictionary = dL.read_github_data(
            params['DATA_Path'],
            use_back_translation=params['use_back_translation'],
            back_translation_file=params['back_translation_file'])

    else:
        print('Loading {} data'.format(params['data_set_name']))
        train_data, val_data, test_data, word2id_dictionary, id2word_dictionary = dL.read_data(
            params['DATA_Path'],
            use_back_translation=params['use_back_translation'],
            back_translation_file=params['back_translation_file'])

    print('Saving data...')
    with open(file_name.replace('@dpart@', 'train'), "wb") as output_file:
        pickle.dump(train_data, output_file)
    with open(file_name.replace('@dpart@', 'val'), "wb") as output_file:
        pickle.dump(val_data, output_file)
    with open(file_name.replace('@dpart@', 'test'), "wb") as output_file:
        pickle.dump(test_data, output_file)
    with open(file_name.replace('@dpart@', 'vocab'), "wb") as output_file:
        pickle.dump([word2id_dictionary, id2word_dictionary], output_file)

    del train_data, val_data, test_data, word2id_dictionary, id2word_dictionary
示例#4
0
attribute_file = '/home/youngwook/Documents/celeb/list_attr_celeba.txt'
folder_names = image_folder.get_folders(image_dir)

#options
from options import options
options = options()
opts = options.parse()

#Download and load the training data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),
                         (0.5, 0.5, 0.5)),
])

data = data_loader.read_data(opts, folder_names[0])

train_x, test_x = data.train_test_split()

train_data = data_loader.CelebA_DataLoader(train_x, folder_names[0], transform=transform, attribute_file=attribute_file, size=opts.resize, randomcrop=opts.image_shape)
trainloader = DataLoader(train_data, batch_size=opts.batch, shuffle=True, num_workers=4)

test_data = data_loader.CelebA_DataLoader(test_x, folder_names[0], transform=transform, attribute_file=attribute_file, size=opts.resize, randomcrop=opts.image_shape)
testloader = DataLoader(train_data, batch_size=256, shuffle=True, num_workers=8)

from network import resnet50
from train import GAN_Trainer
from test import Gan_Tester

'''Discriminator'''
D = resnet50().to(device)
示例#5
0
    for x in range(items.shape[0]):
        child = build_tree(dictionary[items[x]], metadata)
        node.children.append((items[x], child))

    return node


def empty(size):
    s = ""
    for x in range(size):
        s += "   "
    return s


def print_tree(node, level):
    if node.answer != "":
        print(empty(level), node.answer)
        return

    print(empty(level), node.attribute)

    for value, n in node.children:
        print(empty(level + 1), value)
        print_tree(n, level + 2)


filename = input()
metadata, traindata = read_data(filename)
data = np.asarray(traindata)
node = build_tree(data, metadata)
print_tree(node, 0)
def main(_):
    print("Loading data...")
    x, y, sequence_length = data_loader.read_data(FLAGS.pos_data,
                                                  FLAGS.neg_data,
                                                  FLAGS.max_word_length,
                                                  FLAGS.max_seq_length)
    print("Data Size:", len(y))
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    y_shuffled = y[shuffle_indices]
    seq_shuffled = sequence_length[shuffle_indices]
    dev_sample_index = -1 * int(FLAGS.dev_percentage * float(len(y)))
    x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[
        dev_sample_index:]
    y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[
        dev_sample_index:]
    seq_train, seq_dev = seq_shuffled[:dev_sample_index], seq_shuffled[
        dev_sample_index:]
    del x, y, sequence_length, x_shuffled, y_shuffled, seq_shuffled
    print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = True
        #session_conf.gpu_options.per_process_gpu_memory_fraction = 0.45
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = CharCNN(char_vocab_size=FLAGS.char_vocab_size,
                          char_embed_size=FLAGS.char_embed_size,
                          batch_size=FLAGS.batch_size,
                          max_word_length=FLAGS.max_word_length,
                          max_seq_length=FLAGS.max_seq_length,
                          filters=eval(FLAGS.filters),
                          filter_sizes=eval(FLAGS.filter_sizes),
                          num_classes=FLAGS.num_classes,
                          rnn_size=FLAGS.rnn_size,
                          attention_size=FLAGS.attention_size)

            save_path = os.path.join(FLAGS.save_path)
            if not os.path.isdir(save_path):
                os.makedirs(save_path)
            saver = tf.train.Saver(tf.trainable_variables())
            for v in tf.trainable_variables():
                print("Save:", v.name)

            sess.run(tf.global_variables_initializer())

            check_point_dir = os.path.join(FLAGS.save_path)
            ckpt = tf.train.get_checkpoint_state(check_point_dir)
            if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
                print("Reading model parameters from %s" %
                      ckpt.model_checkpoint_path)
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                print("Created model with fresh parameters.")

            batches = data_loader.batch_iter(
                list(zip(x_train, y_train, seq_train)), FLAGS.batch_size,
                FLAGS.num_epochs)

            gloabl_max_acc = 0
            for batch in batches:
                x_batch, y_batch, seq_batch = zip(*batch)
                train_step(x_batch, y_batch, seq_batch, sess, cnn)
                current_step = tf.train.global_step(sess, cnn.global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    max_dev_acc = 0
                    print("\nEvaluation:")
                    batches_dev = data_loader.batch_iter(
                        list(zip(x_dev, y_dev, seq_dev)), FLAGS.batch_size, 1)
                    for batch_dev in batches_dev:
                        x_batch_dev, y_batch_dev, seq_batch_dev = zip(
                            *batch_dev)
                        max_dev_acc = dev_step(x_batch_dev, y_batch_dev,
                                               seq_batch_dev, sess, cnn,
                                               max_dev_acc)
                    print("During this evaluation phase, the max accuracy is:",
                          max_dev_acc)
                    if max_dev_acc > gloabl_max_acc:
                        gloabl_max_acc = max_dev_acc
                    print("\n Until now, the max accuracy is:", gloabl_max_acc)
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      os.path.join(save_path, "model"),
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
示例#7
0
    return node


def empty(size):
    s = ""
    for x in range(size):
        s += "   "
    return s


def print_tree(node, level):

    if node.answer != "":
        print(empty(level), node.answer)
        return

    print(empty(level), node.attribute)

    for value, n in node.children:
        print(empty(level + 1), value)
        print_tree(n, level + 2)


metadata, traindata = read_data("train_dt.csv")

data = np.array(traindata)
print('ud', np.unique(data[:, -1]))
node = create_node(data, metadata)

print_tree(node, 0)
示例#8
0
文件: main.py 项目: yk287/Audio
#options
from options import options
from data_loader import Audio_Dataloader, read_data
import torch
from torchvision import datasets, transforms
from network import discriminator
import torch.optim as optim
from train import trainer
import torch.nn as nn

options = options()
opts = options.parse()

data_reader = read_data(opts)
train_filename, test_filename = data_reader.train_test_split()

train_data = Audio_Dataloader(train_filename, opts,
                              data_reader.name_to_label_dict)
test_data = Audio_Dataloader(test_filename, opts,
                             data_reader.name_to_label_dict)

trainloader = torch.utils.data.DataLoader(train_data,
                                          batch_size=opts.batch,
                                          shuffle=True,
                                          num_workers=opts.cpu_count)
testloader = torch.utils.data.DataLoader(test_data,
                                         batch_size=opts.batch,
                                         shuffle=True,
                                         num_workers=opts.cpu_count)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
示例#9
0
from data_loader import shuffle_split, gen_list, read_data, read_mask_onehot_encoding

path = '/Lab1/Lab3/MRI/'
img_h, img_w = 240, 240
Img = gen_list(path, 'Image')
Mask = gen_list(path, 'Mask')

Mask_train, Mask_validation, Img_train, Img_validation = shuffle_split(
    Mask, Img, 80)  # Image and mask distribution

Mask_train = read_mask_onehot_encoding(path + 'Mask/', Mask_train, img_h,
                                       img_w)
Mask_validation = read_mask_onehot_encoding(path + 'Mask/', Mask_validation,
                                            img_h, img_w)

Img_train = read_data(path + 'Image/', Img_train, img_h, img_w)
Img_validation = read_data(path + 'Image/', Img_validation, img_h, img_w)

model = get_unet(input_img=(240, 240, 1),
                 n_filters=16,
                 kernel_size=3,
                 dropout=0.5,
                 batchnorm=True)

model.compile(optimizer=Adam(lr=0.0001),
              loss=[dice_coef_loss],
              metrics=[dice_coef, precision, recall])
History = model.fit(Img_train,
                    Mask_train,
                    batch_size=4,
                    epochs=100,
示例#10
0
    node = Node(metadata[split])
    metadata = np.delete(metadata, split, 0)
    items, dict = subtables(data, split, delete=True)
    for x in range(items.shape[0]):
        child = create_node(dict[items[x]], metadata)
        node.children.append((items[x], child))
    return node


def empty(size):
    S = ""
    for x in range(size):
        S += " "
    return S


def print_tree(node, level):
    if node.answer != "":
        print(empty(level), node.answer)
        return
    print(empty(level), node.attribute)
    for value, n in node.children:
        print(empty(level + 1), value)
        print_tree(n, level + 2)


metadata, traindata = read_data("tennis.csv")
data = np.array(traindata)
node = create_node(data, metadata)
print_tree(node, 0)
示例#11
0
文件: main.py 项目: yk287/ML
attribute_file = '/home/youngwook/Documents/ClothingAttributeDataset/labels/category_GT.mat'
folder_names = image_folder.get_folders(image_dir)

#options
from options import options
options = options()
opts = options.parse()

#Download and load the training data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),
                         (0.5, 0.5, 0.5)),
])

train_image, train_tag, test_image, test_tag = data_loader.read_data(opts, folder_names[0], attribute_file=attribute_file).train_test_split()


train_data = data_loader.CelebA_DataLoader(opts, train_image, train_tag, transform=transform, size=opts.resize, randomcrop=opts.image_shape)
trainloader = DataLoader(train_data, batch_size=opts.batch, shuffle=True, num_workers=4)

from network import discriminator
import trainer
from tester import tester

output_size = opts.num_classes
model = discriminator(opts).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=opts.lr, betas=(opts.beta1, opts.beta2))
示例#12
0
def interact():

    skip_class = False

    data = data_loader.read_data('in.data', skip_class=skip_class,
                                 skip_header=False)
    data = np.array(data)

    if not skip_class:
        split = np.split(data, [-1], axis=1)
        data = split[0]
        org_classes = split[1]

    data = data.astype(np.float)

    k_m = 0
    k_n = 10000

    while k_m < 1 or k_m > data.shape[0]:

        k_m = raw_input("Input number of k-means: ")
        k_m = int(k_m)
        if k_m < 1:
            print "K too small. Try k > 0"
        elif k_m > data.shape[0]:
            print "K too large. Try k <= " + str(data.shape[0])

    means, new_classes = k_means(data, k_m)

    if k_m == 3:
        mappings, coded_classes, matches = k_means_test(data, means,
                                                        new_classes,
                                                        org_classes)
        data_writer.write_tests('out.data', "K-means", k_m, data,
                                coded_classes, org_classes, matches)
        print("Output of k-Means with test written to out.data file")
    else:
        data_writer.write_data('out.data', "K-means", k_m, data, new_classes)
        print("Output of k-Means written to out.data file")

    while k_n >= data.shape[0]:

        k_n = raw_input("Input number of k-NN: ")
        k_n = int(k_n)
        if k_n >= data.shape[0]:
            print "K too large. Try k < " + str(data.shape[0])

    new_element = []
    coo = 0

    print("Add coordinates of the new element")

    for x in range(data.shape[1]):

        coo = raw_input("Enter float for " + str(x + 1) + ". coordinate: ")
        if coo == "":
            coo = 0
        coo = float(coo)
        new_element.append(coo)

    new_element = np.array([new_element])
    new_class = k_nn(data, new_classes, k_n, new_element)

    if k_m == 3:
        new_class = mappings[new_class]

    new_class = np.array([[new_class]])

    data_writer.write_data('out.data', "K-NN", k_n, new_element, new_class)

    print("Output of k-NN written to out.data file")
def train():
	# Data Preparation
	# Load data
	print("Loading data...")
	x, y = data_loader.read_data(FLAGS.pos_data, FLAGS.neg_data,
												  FLAGS.max_sequence_length)
	print("Data Size:", len(y))
	np.random.seed(10)
	shuffle_indices = np.random.permutation(np.arange(len(y)))
	x_shuffled = x[shuffle_indices]
	y_shuffled = y[shuffle_indices]

	dev_sample_index = -1 * int(FLAGS.dev_percentage * float(len(y)))
	x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
	y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]

	del x, y, x_shuffled, y_shuffled
	print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

	num_batches_per_epoch = int((len(x_train) - 1) / FLAGS.batch_size) + 1
	print("Loading data succees...")

	# ConvNet
	acc_list = [0]

	session_conf = tf.ConfigProto(
		allow_soft_placement=FLAGS.allow_soft_placement,
		log_device_placement=FLAGS.log_device_placement)
	session_conf.gpu_options.allow_growth = True
	# session_conf.gpu_options.per_process_gpu_memory_fraction = 0.45
	sess = tf.Session(config=session_conf)

	cnn = VDCNN(num_classes=y_train.shape[1],
		num_quantized_chars=FLAGS.vocab_size,
		depth=FLAGS.depth,
		sequence_max_length=FLAGS.max_sequence_length,
		downsampling_type=FLAGS.downsampling_type,
		use_he_uniform=FLAGS.use_he_uniform,
		optional_shortcut=FLAGS.optional_shortcut)

	# Optimizer and LR Decay
	update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
	with tf.control_dependencies(update_ops):
		global_step = tf.Variable(0, name="global_step", trainable=False)
		learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, FLAGS.num_epochs*num_batches_per_epoch, 0.95, staircase=True)
		optimizer = tf.train.AdamOptimizer(learning_rate)
		gradients, variables = zip(*optimizer.compute_gradients(cnn.loss))
		gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
		train_op = optimizer.apply_gradients(zip(gradients, variables), global_step=global_step)

	###
	# Output directory for models and summaries
	timestamp = str(int(time.time()))
	out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
	print("Writing to {}\n".format(out_dir))

	# Summaries for loss and accuracy
	loss_summary = tf.summary.scalar("loss", cnn.loss)
	acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

	# Train Summaries
	train_summary_op = tf.summary.merge([loss_summary, acc_summary])
	train_summary_dir = os.path.join(out_dir, "summaries", "train")
	train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

	# Dev summaries
	dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
	dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
	dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

	# Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
	checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
	checkpoint_prefix = os.path.join(checkpoint_dir, "model")
	if not os.path.exists(checkpoint_dir):
		os.makedirs(checkpoint_dir)
	saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)

	# Initialize Graph
	sess.run(tf.global_variables_initializer())

	# sess = tfdbg.LocalCLIDebugWrapperSession(sess)  # 被调试器封装的会话
	# sess.add_tensor_filter("has_inf_or_nan", tfdbg.has_inf_or_nan)  # 调试器添加过滤规则

	# Train Step and Test Step
	def train_step(x_batch, y_batch):
		"""
		A single training step
		"""
		feed_dict = {cnn.input_x: x_batch,
					 cnn.input_y: y_batch,
					 cnn.is_training: True}
		_, step, summaries, loss, accuracy = sess.run([train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy], feed_dict)
		train_summary_writer.add_summary(summaries, step)
		time_str = datetime.datetime.now().isoformat()
		print("{}: Step {}, Epoch {}, Loss {:g}, Acc {:g}".format(time_str, step, int(step//num_batches_per_epoch)+1, loss, accuracy))
		#if step%FLAGS.evaluate_every == 0 and FLAGS.enable_tensorboard:
		#	summaries = sess.run(train_summary_op, feed_dict)
		#	train_summary_writer.add_summary(summaries, global_step=step)

	def test_step(x_batch, y_batch):
		"""
		Evaluates model on a dev set
		"""
		feed_dict = {cnn.input_x: x_batch,
					 cnn.input_y: y_batch,
					 cnn.is_training: False}
		summaries_dev, loss, preds, step = sess.run([dev_summary_op, cnn.loss, cnn.predictions, global_step], feed_dict)
		dev_summary_writer.add_summary(summaries_dev, step)
		time_str = datetime.datetime.now().isoformat()
		return preds, loss

	# Generate batches
	# train_batches = data_helper.batch_iter(list(zip(train_data, train_label)), FLAGS.batch_size, FLAGS.num_epochs)

	batches = data_loader.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)

	# Training loop. For each batch...
	for train_batch in batches:
		x_batch, y_batch = zip(*train_batch)
		train_step(x_batch, y_batch)
		current_step = tf.train.global_step(sess, global_step)
		# Testing loop
		if current_step % FLAGS.evaluate_every == 0:
			print("\nEvaluation:")
			i = 0
			index = 0
			sum_loss = 0
			test_batches = data_loader.batch_iter(list(zip(x_dev, y_dev)), FLAGS.batch_size, 1, shuffle=False)
			y_preds = np.ones(shape=len(y_dev), dtype=np.int)
			for test_batch in test_batches:
				x_test_batch, y_test_batch = zip(*test_batch)
				preds, test_loss = test_step(x_test_batch, y_test_batch)
				sum_loss += test_loss
				res = np.absolute(preds - np.argmax(y_test_batch, axis=1))
				y_preds[index:index+len(res)] = res
				i += 1
				index += len(res)

			time_str = datetime.datetime.now().isoformat()
			acc = np.count_nonzero(y_preds==0)/len(y_preds)
			acc_list.append(acc)
			print("{}: Evaluation Summary, Loss {:g}, Acc {:g}".format(time_str, sum_loss/i, acc))
			print("{}: Current Max Acc {:g} in Iteration {}".format(time_str, max(acc_list), int(acc_list.index(max(acc_list))*FLAGS.evaluate_every)))

		if current_step % FLAGS.checkpoint_every == 0:
			path = saver.save(sess, checkpoint_prefix, global_step=current_step)
			print("Saved model checkpoint to {}\n".format(path))
示例#14
0
            best_acc = switch_acc
        #writer.add_scalar('mask_accuracy', mask_acc, epoch_i)
        #writer.add_scalar('avg_mask_loss', mask_loss/total_batch, epoch_i)
        #writer.add_scalar('replace_accuracy', replace_acc, epoch_i)
        #writer.add_scalar('avg_replace_loss', replace_loss/total_batch, epoch_i)
        writer.add_scalar('switch_accuracy', switch_acc, epoch_i)
        writer.add_scalar('avg_switch_loss', switch_loss / total_batch,
                          epoch_i)
        #writer.add_scalar('sorter_accuracy', sorter_acc, epoch_i)
        #writer.add_scalar('avg_sorter_loss', sorter_loss/total_batch, epoch_i)


if __name__ == '__main__':
    train_data, dev_data, test_data = \
        get_train_dev_test_data(keep_single_sent=False)
    newsroom_train = read_data('data/summarization/newsroom/train.txt.src',
                               False, False)
    newsroom_dev = read_data('data/summarization/newsroom/dev.txt.src', False,
                             False)
    train_data += newsroom_train
    train_data += newsroom_dev
    #print(train_data[0])
    #print(dev_data[0])
    #print(test_data[0])
    #my_vocab = build_vocab([train_data, dev_data, test_data])
    my_vocab = load_vocab()
    #train_target = read_target(train_tgt_file)
    #dev_target = read_target(dev_tgt_file)
    train_target = None
    dev_target = None
    train(train_data, dev_data, my_vocab, train_target, dev_target)
示例#15
0
from __future__ import print_function
from data_loader import read_data
import math


header, training_data = read_data("DTree.csv")
totalEntropy = 0;

#def Class Node:
 #   def __init__(self,attribute)

#returns unique values for a col
def giveUnique(rows, col):
    return list(set(row[col] for row in rows))

#returns counts of class labels for each value of a column
def getCounts(rows, col):
    counts = {}
    for row in rows:
        if row[col] not in counts:
            counts[row[col]] = [0,0]
        if row[-1] == 'yes':
            counts[row[col]][0] += 1
        else:
            counts[row[col]][1] += 1
    return counts

def fullEntropy(rows):
    counts = {}
    labels = giveUnique(rows, -1)
    for label in labels:
示例#16
0
    for x in range(items.shape[0]):
        child = create_node(dict[items[x]], metadata)
        node.children.append((items[x], child))
    
    return node        
    
def empty(size):
    s = ""
    for x in range(size):
        s += "   "
    return s

def print_tree(node, level):
    if node.answer != "":
        print empty(level), node.answer
        return
        
    print empty(level), node.attribute
    
    for value, n in node.children:
        print empty(level + 1), value
        print_tree(n, level + 2)
        

metadata, traindata = read_data("stock.data")

data = np.array(traindata)

node = create_node(data, metadata)
    
print_tree(node, 0)
        node.children.append((items[x], child))

    return node


def empty(size):
    s = ""
    for x in range(size):
        s += "   "
    return s


def print_tree(node, level):
    if node.answer != "":
        print(empty(level), node.answer)
        return

    print(empty(level), node.attribute)

    for value, n in node.children:
        print(empty(level + 1), value)
        print_tree(n, level + 2)


metadata, traindata = read_data("playtennis.data")

data = np.array(traindata)

node = create_node(data, metadata)

print_tree(node, 0)
示例#18
0
def train(mode):
    print "Loading data..."
    data = data_loader.read_data(FLAGS.train_file, FLAGS.max_sequence_length)

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = True
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = APCNN(FLAGS, mode)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", FLAGS.model_name,
                             timestamp))
            print("Writing to {}\n".format(out_dir))

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            restore = FLAGS.restore_model
            if restore:
                saver.restore(sess, FLAGS.model_path)
                print(
                    "*" * 20 + "\nReading model parameters from %s \n" %
                    FLAGS.model_path + "*" * 20)
            else:
                print("*" * 20 + "\nCreated model with fresh parameters.\n" +
                      "*" * 20)

            def train_step(q_batch, pos_batch, neg_batch, epoch):
                """
                A single training step
                """

                feed_dict = {
                    cnn.usrq: q_batch,
                    cnn.pos: pos_batch,
                    cnn.neg: neg_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob,
                    cnn.is_training: True
                }

                _, step, loss = sess.run(
                    [cnn.update, cnn.global_step, cnn.loss], feed_dict)

                time_str = datetime.datetime.now().isoformat()
                print "{}: Epoch {} step {}, loss {:g}".format(
                    time_str, epoch, step, loss)

            # Generate batches
            batches = data_loader.batch_iter(data, FLAGS.batch_size,
                                             FLAGS.max_epoch, True)

            num_batches_per_epoch = int((len(data)) / FLAGS.batch_size) + 1

            # Training loop. For each batch...
            epoch = 0
            for batch in batches:
                q_batch = batch[:, 0]
                pos_batch = batch[:, 1]
                neg_batch = batch[:, 2]
                train_step(q_batch, pos_batch, neg_batch, epoch)
                current_step = tf.train.global_step(sess, cnn.global_step)

                if current_step % num_batches_per_epoch == 0:
                    epoch += 1

                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
    params['lr'] = 1e-3
    params['batch_size'] = 256
    params['epoch_limit'] = 10
    params['w_decay'] = 1.
    params['negNum_test'] = 1000
    params['epsilon'] = 1e-4
    params['negNum_train'] = 2
    params['l_size'] = 16
    params['train_device'] = 'cuda'
    params['test_device'] = 'cuda'
    params['lambda'] = 1.
    params['test_per_train'] = 2

    category = 'Moivelens1M'

    train, test = data_loader.read_data(category)
    userNum, itemNum = data_loader.get_datasize(category)
    frequency = data_loader.get_distribution(category)
    distribution = data_loader.approx_Gaussian(frequency)

    trainset = data_loader.TransactionData(train, userNum, itemNum,
                                           distribution)
    trainLoader = DataLoader(trainset,
                             batch_size=params['batch_size'],
                             shuffle=False,
                             num_workers=0)

    testset = data_loader.UserTransactionData(test, userNum, itemNum,
                                              trainset.userHist)
    testset.set_negN(params['negNum_test'])
    testLoader = DataLoader(testset,