Exemplo n.º 1
0
def main():


    input_data,output_file = utils.IO_files()

    if not utils.check_data(input_data) :
        utils.generate_data(output_file)


    output_file.close()
def indexed_comparison(input_file, idf_file, optimal_cosine_score = 0.2):
    term_freq_dict = {}
    articles = {}
    cosine_tfidf_instance = Cosine_tfidf(idf_file)
    for article_a_id, article_a_text in utils.generate_data(input_file):

        # calculate current article's term frequency and store
        article_a_term_freq = calculate_term_frequencies(article_a_text)
        articles[article_a_id] = article_a_term_freq

        current_cosine_score = 0
        best_article_id = 0
        # loop back through all the previous articles
        for article_b_id in articles:
            if article_b_id == article_a_id:
                continue

            article_b_term_freq = articles[article_b_id]
            new_cosine_score = cosine_tfidf_instance.calculate_cosine_tfidf(article_a_term_freq, article_b_term_freq)

            if new_cosine_score > current_cosine_score:
                best_article_id = article_b_id
                current_cosine_score = new_cosine_score

        if current_cosine_score > optimal_cosine_score:
            yield article_a_id, best_article_id
Exemplo n.º 3
0
    def build_corpus_sample(K, T, W, D, M, a=1, b=1, tau=4):
        '''
        Builds a toy Corpus dataset.
        Generates toy data using the generative model of MixEHR.
        :param K: number of topics
        :param T: number of types
        :param W: number of words in the vocabulary
        :param D: number of documents
        :param M: number of words. We assume that all documents have same length.
        :param a: shape parameter of gamma distribution; used to sample the hyper-parameters
        :param b: scale parameter of gamma distribution; used to sample the hyper-parameters
        :param tau: mean used to sample w
        :return: y: response b: types x: words z: topics-assignment g: response
        '''
        y, b, x, z, g, theta = generate_data(K, T, W, D, M, a, b, tau)
        dataset = []
        C = 0
        for i in range(D):
            cnt = Counter()
            len(b[:, i])
            patient = Corpus.Patient(i, i, y[i])
            for batch in zip(b[:, i], x[:, i]):
                cnt[batch] += 17
            for type, word in cnt:
                freq = cnt[(type, word)]
                patient.append_record(type, word, freq)
                C += freq
            dataset.append(patient)
        corpus = Corpus(dataset, T, W, C)
        corpus.z = z
        corpus.g = g

        return corpus, theta
Exemplo n.º 4
0
def indexed_comparison(input_file, idf_file, optimal_cosine_score=0.2):
    term_freq_dict = {}
    articles = {}
    cosine_tfidf_instance = Cosine_tfidf(idf_file)
    for article_a_id, article_a_text in utils.generate_data(input_file):

        # calculate current article's term frequency and store
        article_a_term_freq = calculate_term_frequencies(article_a_text)
        articles[article_a_id] = article_a_term_freq

        current_cosine_score = 0
        best_article_id = 0
        # loop back through all the previous articles
        for article_b_id in articles:
            if article_b_id == article_a_id:
                continue

            article_b_term_freq = articles[article_b_id]
            new_cosine_score = cosine_tfidf_instance.calculate_cosine_tfidf(
                article_a_term_freq, article_b_term_freq)

            if new_cosine_score > current_cosine_score:
                best_article_id = article_b_id
                current_cosine_score = new_cosine_score

        if current_cosine_score > optimal_cosine_score:
            yield article_a_id, best_article_id
Exemplo n.º 5
0
def main():
    args = parser.parse_args()
    model_path = args.model
    dataset_size = args.size
    batch_size = args.batch_size
    backend_name = args.backend
    print_freq = args.print_freq

    # Load ONNX model
    onnx_protobuf = onnx.load(model_path)
    # Change batch size defined in model to value passed by user as argument
    onnx_protobuf.graph.input[0].type.tensor_type.shape.dim[0].dim_value = batch_size

    ng_model = import_onnx_model(onnx_protobuf)
    model_batch, model_channels, model_height, model_width = ng_model.get_parameters()[0].shape

    # Generate synthetic dataset filled with random values
    dataset = generate_data(count=dataset_size,
                            batch_size=model_batch,
                            image_channels=model_channels,
                            image_height=model_height,
                            image_width=model_width)
    dataset = [(img, 0) for img in dataset]

    perf_metrics = evaluate(backend_name, ng_model, dataset, batch_size, print_freq)
    save_results('results/', args.output_file, {key: val.data for key, val in perf_metrics.items()})
Exemplo n.º 6
0
def main():
    features, cases, variance = 5, 10000, 0
    x, y, thetas = generate_data(features, cases, variance)
    print(f'Generated thetas: {thetas}')

    thetas = normal_equation(x, y)
    print(f'Calculated thetas: {thetas}')
Exemplo n.º 7
0
def create_application():
    app = Flask(__name__)
    app.config.from_object(Config())
    app.logger.removeHandler(default_handler)

    generate_data(app.config['JSON_TMP_FILE'])

    from controllers.guids import guids_blueprint
    app.register_blueprint(guids_blueprint)

    simple_errors = (400, 401, 404, 403)

    def simple_error(e):
        return jsonify(error=e.code, message=e.description), e.code

    for error in simple_errors:
        app.errorhandler(error)(simple_error)

    return app
Exemplo n.º 8
0
def test_PointerSeq2Seq_TSP():
    ''' 
        The data has been generated from https://github.com/vyraun/Keras-Pointer-Network.
            A sample in X looks like [(8, 0), (2, 8), (6, 9), (9, 8), (7, 5), (0, 5), (4, 6), (8, 2), (5, 2), (4, 9), (5, 0)]
            A sample in Y looks like [0, 1, 3, 9, 7, 8, 5, 4, 2, 10, 6]
    '''
    X = []
    Y = []
    for _ in xrange(0, tsp_samples):
        X.append(utils.generate_data(tsp_input_length))
    for samples in X:
        solution = utils.Tsp(samples)
        Y.append(solution.solve_tsp_dynamic())
    '''
        One hot encoding for the output symbols.
    '''
    one_hot_matrix = np.eye(tsp_input_length)
    Y = [[one_hot_matrix[sample[x]] for x in range(len(sample))]
         for sample in Y]

    # pprint(X[0])
    # pprint(Y[0])
    # raw_input()

    #Transmuting the data into Numpy arrays
    X = np.asarray(X) / 10.0
    Y = np.asarray(Y)

    x_train, x_test = X[:int(X.shape[0] * .80)], X[int(X.shape[0] * .80):]
    y_train, y_test = Y[:int(Y.shape[0] * .80)], Y[int(Y.shape[0] * .80):]

    print "Done making dummy data"
    print "tsp_input_length", tsp_input_length, "sd", tsp_input_dim
    models = Pointer(output_dim=tsp_output_dim,
                     hidden_dim=tsp_hidden_dim,
                     output_length=tsp_output_length,
                     input_shape=(tsp_input_length, tsp_input_dim),
                     batch_size=10,
                     bidirectional=False)
    print "Done creating model"

    # models.compile(loss='mse', optimizer='fast_compile')
    models.compile(loss='mse', optimizer='sgd')
    print models.summary()
    models.fit(X, Y, epochs=10, batch_size=10)
    print "Done fitting model"

    print "Done everything master"
    while True:
        cmd = raw_input(
            "Master, please give Dobby a sock now. (Just write sock)")
        if cmd.lower() == "sock":
            break
        print "Master, why must thy be so cruel."
        print "Let's try that again."
Exemplo n.º 9
0
def build_fullyconnected(norm=np.inf, nhidden=5):

    # generate data (in a box)
    X, y = generate_data(norm=norm)

    # build network
    L1 = Layer((nhidden, 2))
    L2 = Layer((1, nhidden))
    net = Network(X, y, [L1, L2])

    return net
def main():
	features, cases, variance = 5, 10000, 0
	alpha, iterations, epsilon = 0.000000005, 100000, 0.000001

	x, y, generated_thetas = generate_data(features, cases, variance)
	calculated_thetas, costs = gradient_descent(
		x, y, alpha, features, cases, iterations, epsilon
	)

	plot_costs(costs)
	print(f'Generated thetas: {generated_thetas}')
	print(f'Calculated thetas: {calculated_thetas}')
Exemplo n.º 11
0
def test_PointerSeq2Seq_TSP():
    ''' 
        The data has been generated from https://github.com/vyraun/Keras-Pointer-Network.
            A sample in X looks like [(8, 0), (2, 8), (6, 9), (9, 8), (7, 5), (0, 5), (4, 6), (8, 2), (5, 2), (4, 9), (5, 0)]
            A sample in Y looks like [0, 1, 3, 9, 7, 8, 5, 4, 2, 10, 6]
    '''
    X = []
    Y = []
    for _ in xrange(0,tsp_samples):
        X.append(utils.generate_data(tsp_input_length))
    for samples in X:
        solution = utils.Tsp(samples)
        Y.append(solution.solve_tsp_dynamic())

    '''
        One hot encoding for the output symbols.
    '''
    one_hot_matrix = np.eye(tsp_input_length)
    Y = [[ one_hot_matrix[sample[x]] for x in range(len(sample)) ] for sample in Y ]

    # pprint(X[0])
    # pprint(Y[0])
    # raw_input()

    #Transmuting the data into Numpy arrays
    X = np.asarray(X)/10.0
    Y = np.asarray(Y)

    x_train,x_test = X[:int(X.shape[0]*.80)],X[int(X.shape[0]*.80):]
    y_train,y_test = Y[:int(Y.shape[0]*.80)],Y[int(Y.shape[0]*.80):]
        
    print "Done making dummy data"
    print "tsp_input_length", tsp_input_length, "sd", tsp_input_dim
    models = Pointer(output_dim=tsp_output_dim, hidden_dim=tsp_hidden_dim, output_length=tsp_output_length, input_shape=(tsp_input_length, tsp_input_dim), batch_size=10,bidirectional=False)
    print "Done creating model"

    # models.compile(loss='mse', optimizer='fast_compile')
    models.compile(loss='mse', optimizer='sgd')
    print models.summary()
    models.fit(X, Y, epochs=10,batch_size=10)
    print "Done fitting model"

    print "Done everything master"
    while True:
        cmd = raw_input("Master, please give Dobby a sock now. (Just write sock)")
        if cmd.lower() == "sock":
            break
        print "Master, why must thy be so cruel."   
        print "Let's try that again."
def run_k_means_gmm():
    k = 3
    num_samples = 1000
    x_limits = [0,10]
    y_limits = [0,10]
    data, means = utils.generate_data(k=k, num_samples=num_samples, x_limits=x_limits, y_limits=y_limits)

    # plot generated data
    utils.plot_data_k(data, k, means)

    # run k-means a few times and take best
    init_means, assignments, best_dist = initialize(data, k, num_runs=10)

    # plot results
    print 'total euclidean distance: {}'.format(best_dist)
    utils.plot_data_assigments(data, init_means, assignments)
Exemplo n.º 13
0
def train():
    model.train()
    losses = list()
    for batch_id, batch in enumerate(
            generate_data(text_as_int, batch_size, seq_len)):
        src, trg = batch
        src = src.permute(1, 0).to(device)
        trg = trg.permute(1, 0).to(device)
        optimizer.zero_grad()
        preds = model(src)
        loss = criterion(preds.contiguous().view(-1, vocab_size),
                         trg.contiguous().view(-1))
        losses.append(loss.item())
        avg_loss = sum(losses) / len(losses)
        loss.backward()
        optimizer.step()
        if batch_id % 10 == 0:
            print(f'epoch: {epoch} | loss: {avg_loss:.4f}')
Exemplo n.º 14
0
def cw_attack(file_name, norm, sess, num_image=10, cifar = False, tinyimagenet = False):
    np.random.seed(1215)
    tf.set_random_seed(1215)
    random.seed(1215)
    if norm == '1':
        attack = EADL1
        norm_fn = lambda x: np.sum(np.abs(x),axis=(1,2,3))
    elif norm == '2':
        attack = CarliniL2
        norm_fn = lambda x: np.sum(x**2,axis=(1,2,3))
    elif norm == 'i':
        attack = CarliniLi
        norm_fn = lambda x: np.max(np.abs(x),axis=(1,2,3))

    if cifar:
        data = CIFAR()
    elif tinyimagenet:
        data = tinyImagenet()
    else:
        data = MNIST()
    model = load_model(file_name, custom_objects={'fn':loss,'tf':tf, 'ResidualStart' : ResidualStart, 'ResidualStart2' : ResidualStart2})
    inputs, targets, true_labels, true_ids, img_info = generate_data(data, samples=num_image, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.predict, start=0)
    model.predict = model
    model.num_labels = 10
    if cifar:
        model.image_size = 32
        model.num_channels = 3
    elif tinyimagenet:
        model.image_size = 64
        model.num_channels = 3
        model.num_labels = 200
    else:
        model.image_size = 28
        model.num_channels = 1
        
    
    start_time = timer.time()
    attack = attack(sess, model, max_iterations = 1000)
    perturbed_input = attack.attack(inputs, targets)
    UB = np.average(norm_fn(perturbed_input-inputs))
    return UB, (timer.time()-start_time)/len(inputs)
Exemplo n.º 15
0
def train(bob_or_eve, results, max_iters, print_every, es=0., es_limit=100):
    count = 0
    for i in range(max_iters):
        
        msg_in_val, key_val = generate_data()
        
        if bob_or_eve == 'bob':
            loss = train_bob(msg_in_val, key_val)
            results = np.hstack((results, error_bob(msg_in_val, key_val).sum()))
        elif bob_or_eve == 'eve':
            loss = train_eve(msg_in_val, key_val)
            results = np.hstack((results, error_eve(msg_in_val, key_val).sum()))
        
        if i % print_every == 0:
            print 'training loss:', loss
        
        if es and loss < es:
            count += 1
            if count > es_limit:
                break
    return np.hstack((results, np.repeat(results[-1], max_iters - i - 1)))
Exemplo n.º 16
0
def create_feature_list():
    """ generate list of features
        
        Args:
        output: output file
        
        Returns:
        None
    """
    # generate test data
    eeg, user, classlabel, name = utils.generate_data()
    
    # only use small fraction of data
    len = features.SIGNAL_LENGTH / SIGNAL_LENGTH_REDUCE_FACTOR
    eeg = eeg[:len, :]

    # generate features
    row = OrderedDict()
    features.extract_all_eeg_features(eeg, row, len, "")

    return row.keys()
Exemplo n.º 17
0
def main():
    # Step 1: Generate and visualize training data
    X_train, Y_train, X_test, Y_test = generate_data(3, 5, train_set_ratio=0.9)
    visualize_data(X_train, Y_train)
    n_samples = len(X_train)

    # Step 1b: Normalize Xs and re-visualize training data
    X_train, mean_train, std_train = normalize_feature(X_train, mode='train')
    X_test = normalize_feature(X_test, mode='test',
                               mean=mean_train, std=std_train)
    visualize_data(X_train, Y_train, viz_trainining=True)

    # Step 2: Initialize Placeholders for input data
    X = tf.placeholder(shape=[None], dtype=tf.float32, name='X')
    Y = tf.placeholder(shape=[None], dtype=tf.float32, name='Y')

    # Step 3: Build up your model graph
    a_sym, b_sym, = define_parameters()
    cost = define_cost_func(X, Y, a_sym, b_sym, n_samples)

    # Step 4: Create optimizer op and initializer op
    learning_rate = 0.03
    optimizer, initializer = define_optimizer(learning_rate, cost)
    # tf.summary.scalar('cost', cost_tensor)
    with tf.Session() as sess:
        sess.run(initializer)
        for i in range(400):
            _, cost_train = sess.run([optimizer, cost], feed_dict={X: X_train, Y: Y_train})
            a, b = sess.run([a_sym, b_sym])
            cost_test = sess.run(cost, feed_dict={X: X_test, Y: Y_test})
            print('a=', a, 'b=', b)
            print('Training Cost =', cost_train, "\tTesting Cost =", cost_test)
            plt.plot(X_train, Y_train, 'bo')
            draw_model(a, b)
            plt.pause(0.1)

    print('Optimized variable: a = ', a)
    print('Optimized variable: b = ', b)
Exemplo n.º 18
0
""" 
@Author:yanqiang 
@File: sentiment_classidication.py 
@Time: 2018/9/28 11:14
@Software: PyCharm 
@Description:
"""

import numpy as np
import pandas as pd
import lightgbm as lgb
from utils import generate_data,word_seg
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,f1_score,precision_score,recall_score

X,y_sub,y_sent,X_submit,labels_subject=generate_data(use_sina=True)
# 类别标签转换一下
sent_labels={-1:0,0:1,1:2}
labels_sent={0:-1,1:0,2:1}
y_sent=[sent_labels[i] for i in y_sent.tolist()]

X_train_sent,X_test_sent,y_train_sent,y_test_sent=train_test_split(X,y_sent,random_state=42)


def train():
    # 主题
    lgb_train_sub=lgb.Dataset(X_train_sent,y_train_sent)
    lgb_eval_sub=lgb.Dataset(X_test_sent,y_test_sent)

    params_sub= {
        'task': 'train',
def main(_):
    a = datetime.datetime.now()

    if FLAGS.input_width is None:
        FLAGS.input_width = FLAGS.input_height
    if FLAGS.output_width is None:
        FLAGS.output_width = FLAGS.output_height

    if not os.path.exists(FLAGS.checkpoint_par_dir):
        os.makedirs(FLAGS.checkpoint_par_dir)

    if not os.path.exists(FLAGS.sample_dir):
        os.makedirs(FLAGS.sample_dir)

    test_cases = [{
        'id': 'OI_11_00',
        'alpha': 1.0,
        'beta': 1.0,
        'delta_v': 0.0,
        'delta_m': 0.0
    }, {
        'id': 'OI_11_11',
        'alpha': 1.0,
        'beta': 1.0,
        'delta_v': 0.1,
        'delta_m': 0.1
    }, {
        'id': 'OI_11_22',
        'alpha': 1.0,
        'beta': 1.0,
        'delta_v': 0.2,
        'delta_m': 0.2
    }, {
        'id': 'OI_101_00',
        'alpha': 1.0,
        'beta': 0.1,
        'delta_v': 0.0,
        'delta_m': 0.0
    }, {
        'id': 'OI_101_11',
        'alpha': 1.0,
        'beta': 0.1,
        'delta_v': 0.1,
        'delta_m': 0.1
    }, {
        'id': 'OI_101_22',
        'alpha': 1.0,
        'beta': 0.1,
        'delta_v': 0.2,
        'delta_m': 0.2
    }, {
        'id': 'OI_1001_00',
        'alpha': 1.0,
        'beta': 0.01,
        'delta_v': 0.0,
        'delta_m': 0.0
    }, {
        'id': 'OI_1001_11',
        'alpha': 1.0,
        'beta': 0.01,
        'delta_v': 0.1,
        'delta_m': 0.1
    }, {
        'id': 'OI_1001_22',
        'alpha': 1.0,
        'beta': 0.01,
        'delta_v': 0.2,
        'delta_m': 0.2
    }]

    found = False
    for case in test_cases:
        if case['id'] == FLAGS.test_id:
            found = True
            FLAGS.alpha = case['alpha']
            FLAGS.beta = case['beta']
            FLAGS.delta_m = case['delta_m']
            FLAGS.delta_v = case['delta_v']

            print(case)

    if not found:
        print("Using OI_11_00")
        FLAGS.test_id = "OI_11_00"
        FLAGS.alpha = 1.0
        FLAGS.beta = 1.0
        FLAGS.delta_m = 0.0
        FLAGS.delta_v = 0.0

    FLAGS.input_height = 7
    FLAGS.input_width = 7
    FLAGS.output_height = 7
    FLAGS.output_width = 7

    if FLAGS.shadow_gan:
        checkpoint_folder = FLAGS.checkpoint_par_dir + '/' + FLAGS.dataset + "/" + 'atk_' + FLAGS.test_id
    else:
        checkpoint_folder = f'{FLAGS.checkpoint_par_dir}/{FLAGS.dataset}/{FLAGS.test_id}'

    if not os.path.exists(checkpoint_folder):
        os.makedirs(checkpoint_folder)

    FLAGS.checkpoint_dir = checkpoint_folder

    pp.pprint(flags.FLAGS.__flags)
    print(FLAGS.y_dim)

    # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
    run_config = tf.ConfigProto()
    run_config.gpu_options.allow_growth = True

    print("Chekcpoint : " + FLAGS.checkpoint_dir)

    with tf.Session(config=run_config) as sess:
        tablegan = TableGan(sess,
                            input_width=FLAGS.input_width,
                            input_height=FLAGS.input_height,
                            output_width=FLAGS.output_width,
                            output_height=FLAGS.output_height,
                            batch_size=FLAGS.batch_size,
                            sample_num=FLAGS.batch_size,
                            y_dim=FLAGS.y_dim,
                            dataset_name=FLAGS.dataset,
                            crop=FLAGS.crop,
                            checkpoint_dir=FLAGS.checkpoint_dir,
                            sample_dir=FLAGS.sample_dir,
                            alpha=FLAGS.alpha,
                            beta=FLAGS.beta,
                            delta_mean=FLAGS.delta_m,
                            delta_var=FLAGS.delta_v,
                            label_col=FLAGS.label_col,
                            attrib_num=FLAGS.attrib_num,
                            is_shadow_gan=FLAGS.shadow_gan,
                            test_id=FLAGS.test_id)

        show_all_variables()

        if FLAGS.train:
            tablegan.train(FLAGS)

        else:

            if not tablegan.load(FLAGS.checkpoint_dir)[0]:
                raise Exception("[!] Train a model first, then run test mode")

            # Below is codes for visualization
            if FLAGS.shadow_gan:  # using Disriminator sampler for Membership Attack
                OPTION = 5
            else:
                print("Generating data...")
                OPTION = 1

            generate_data(sess, tablegan, FLAGS, OPTION)

            print('Time Elapsed: ')
            b = datetime.datetime.now()
            print(b - a)
                [np.log(pis[k]) + mvn.logpdf(X[n, :], mus[k, :], Sigmas[k, :, :]) 
                 for k in range(K) for n in range(N)])
    log_probs = np.reshape(log_probs_flat, (K, N)).T
    L = np.sum(logsumexp(log_probs, axis=1))
    return L
    
    
from utils import generate_parameters, generate_data 
from plotting import double_panel_demo

if __name__ == '__main__':
    K = 3
    N = 100
    num_its = 16
    
    X = generate_data(N, generate_parameters(K))[0]
    plt = double_panel_demo(K)
    
    while True:
        X = generate_data(N, generate_parameters(K))[0]
        plt.set_new_lims(X, num_its)
        params = generate_parameters(K)
        # these initial parameters are an independent draw from the prior  
        
        objective = []    
        
        plt.cla('ax1')
        plt.cla('ax2')
        plt.plot_points_black(X)
        plt.draw()
        plt.pause(2.)
Exemplo n.º 21
0
            print(f"Please enter god_separation with {d} dimensions:")
            god_separation = list(map(int, input().split()))
            assert len(god_separation
                       ) == d, f"Length of god_separation is not equal to {d}."

            print(
                "Please enter [lower bound, upper bound) of points (e.g. \'-100 100\'):"
            )
            lb, ub = list(map(int, input().split()))
            assert lb < ub and lb * ub < 0, f"The input lower bound {lb} and upper bound {ub} are not valid."
        else:
            raise ValueError(f"Input {use_given_config} is not valid.")
        output_path = generate_data(n,
                                    d,
                                    god_separation,
                                    lower_bound=lb,
                                    upper_bound=ub,
                                    data_path=output_path)
        plot_dataset(output_path)  # plot points from data set
        data_iter = get_iter(output_path)
        w = main_loop(data_iter, w, n, d, ub, lb)  # added two parameters ub lb
        print(f"Finish in {time.time()-st:.2f}s.")
        print(f"Tartget plane: {god_separation}")
    else:
        n, d, ub, lb = plot_dataset(input_path)
        print(f"Dataset size: {n} dimensions: {d}")
        with open(input_path) as data_iter:
            w = main_loop(data_iter, w, n, d, ub,
                          lb)  # added 4 parameters n, d, ub, lb
        print(f"Finish in {time.time()-st:.2f}s.")
    print(f"Found separation plane: {w}")
Exemplo n.º 22
0
import matplotlib.pyplot as plt
plt.ion()

from utils import generate_data, get_context

# DEBUGGING

from theano import ProfileMode
# mode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
# mode = theano.compile.DebugMode(check_py_code=False, require_matching_strides=False)
mode = None


# generate data
print ">> Generating dataset..."
data = generate_data(1000) # np.random.randint(2, size=(10000, n_visible))
data_context = get_context(data, N=1) # keep the number of dimensions low

data_train = data[:-1000, :]
data_eval = data[-1000:, :]
data_context_train = data_context[:-1000, :]
data_context_eval = data_context[-1000:, :]

n_visible = data.shape[1]
n_context = data_context.shape[1]
n_hidden = 20
n_factors = 50

print ">> Constructing RBM..."
numpy_rng = np.random.RandomState(123)
Exemplo n.º 23
0
        print("Evaluating", modelfile)
        sys.stdout.flush()

        random.seed(args.seed)
        np.random.seed(args.seed)
        tf.set_random_seed(args.seed)

        # the weights and bias are saved in lists: weights and bias
        # weights[i-1] gives the ith layer of weight and so on
        weights, biases = get_weights_list(model)

        inputs, targets, true_labels, true_ids, img_info = generate_data(
            data,
            samples=data.test_labels.shape[0],
            total_images=args.numimage,
            targeted=targeted,
            random_and_least_likely=True,
            force_label=force_label,
            target_type=target_type,
            predictor=model.model.predict,
            start=args.startimage)
        # get the logit layer predictions
        preds = model.model.predict(inputs)

        task_input = locals()
        task_modudle = __import__("task_" + args.task)
        task = task_modudle.task(**task_input)

        # warmup
        if args.warmup:
            print("warming up...")
            task.warmup()
Exemplo n.º 24
0
# Network predictions
pred_out = RNN(x, W, b, num_hidden_units, seq_max_len, seqLen)
# pred_out = LSTM(x, W, b, num_hidden_units, seq_max_len, seqLen)

# Define the loss function (i.e. mean-squared error loss) and optimizer
cost = tf.reduce_mean(tf.square(pred_out - y))
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Creating the op for initializing all variables
init = tf.global_variables_initializer()

# ==========
#  TOY DATA
# ==========
x_train, y_train, seq_len_train = generate_data(count=1000, max_length=seq_max_len, dim=input_dim)
x_test, y_test, seq_len_test = generate_data(count=5, max_length=seq_max_len, dim=input_dim)

# x_test = np.array([[[1], [2], [3], [4]],
#                     [[1], [2], [0], [0]],
#                    [[4], [5], [3], [9]]])
# seq_len_test = np.array([4, 2, 4])
# y_test = np.array([[10], [3], [21]])
# ==========

# Launch the graph (session)
with tf.Session() as sess:
    sess.run(init)
    print('----------Training---------')
    for i in range(training_steps):
        x_batch, y_batch, seq_len_batch = next_batch(x_train, y_train, seq_len_train, batch_size)
Exemplo n.º 25
0
              batch_first=True,
              independent_linears=False,
              copy_mode=copy_mode)

    rnn.load_state_dict(torch.load(current_model))

    # Execute the evaluation
    sigm = T.nn.Sigmoid()

    sequence_length -= 1

    for i in tqdm(range(0, args.iterations)):

        x, y, _ = generate_data(1,
                                sequence_length,
                                sequence_num_of_bits + 3,
                                steps=steps,
                                non_uniform=True,
                                ordered=False)
        a = execute(rnn, x, y, sequence_length)

        x, y, _ = generate_data(1,
                                sequence_length,
                                sequence_num_of_bits + 3,
                                steps=steps,
                                non_uniform=True,
                                ordered=True)
        b = execute(rnn, x, y, sequence_length)

        x, y, _ = generate_data(1,
                                sequence_length,
                                sequence_num_of_bits + 3,
# -*- coding: utf-8 -*-
"""
Created on Sat Feb  6 17:21:14 2016

@author: hughsalimbeni
"""

from utils import generate_data
import pickle
import numpy as np
import matplotlib.pyplot as plt

mus = np.reshape((3., 3., -3, 3, 0, -3), (3, 2))
Sigmas = np.reshape((1., 0., 0., 1., 1., 0., 0., 1., 2., 0., 0., 0.5), (3, 2, 2))

pis_ = np.array((2., 1., 1.,))
pis = pis_/np.sum(pis_)

params = (pis, mus, Sigmas)

data_1 = generate_data(50, params)[0]
plt.scatter(data_1[:, 0], data_1[:, 1])

data_2 = generate_data(500, params)[0]
plt.scatter(data_2[:, 0], data_2[:, 1])

pickle.dump((data_1, data_2), open( "data.p", "wb" ))
Exemplo n.º 27
0
def main():
    parser = argparse.ArgumentParser(description='Generates a 2-dimensional grid dataset.')

    parser.add_argument('data_file', help='The location of the file where the data will be saved.')
    parser.add_argument('weights_file', help='The location of the file where the true prior weights will be saved.')
    parser.add_argument('signals_file', help='The location of the file where the underlying true signals will be saved.')
    parser.add_argument('oracle_file', help='The location of the file where the oracle posteriors will be saved.')
    parser.add_argument('edges_file', help='The location of the file where the grid graph edges will be saved.')
    parser.add_argument('trails_file', help='The location of the file where the trails will be saved.')
    
    parser.add_argument('--verbose', type=int, default=0, help='Print detailed progress information to the console. 0=none, 1=outer-loop only, 2=all details.')
    
    # Grid dimensions
    parser.add_argument('--width', type=int, default=128, help='The width of the 2d grid')
    parser.add_argument('--height', type=int, default=128, help='The height of the 2d grid')
    
    # Signal region settings
    parser.add_argument('--region_min_x', nargs='+', type=int, default=[10, 40], help='The min x locations at which the signal weight changes.')
    parser.add_argument('--region_max_x', nargs='+', type=int, default=[25, 50], help='The max x locations at which the signal weight changes.')
    parser.add_argument('--region_min_y', nargs='+', type=int, default=[10, 50], help='The min y locations at which the signal weight changes.')
    parser.add_argument('--region_max_y', nargs='+', type=int, default=[25, 60], help='The max y locations at which the signal weight changes.')
    parser.add_argument('--region_weights', nargs='+', type=float, default=[0.5, 0.8], help='The value of the signal weight for every region.')
    parser.add_argument('--default_weight', type=float, default=0.05, help='The default signal weight for any areas not in the specified regions.')
    
    # Distribution settings
    parser.add_argument('--null_mean', type=float, default=0., help='The mean of the null distribution.')
    parser.add_argument('--null_stdev', type=float, default=1., help='The variance of the null distribution.')
    parser.add_argument('--signal_mean', type=float, default=0., help='The mean of the signal distribution.')
    parser.add_argument('--signal_stdev', type=float, default=3., help='The variance of the signal distribution.')
    parser.add_argument('--signal_dist_name', help='The name of the signal distribution. This will dynamically call it by name. It must be in the signal_distributions.py file and have both the foo_pdf and foo_sample functions defined.')

    # Plot results
    parser.add_argument('--plot', help='Plot the resulting data and save to the specified file.')

    # Get the arguments from the command line
    args = parser.parse_args()

    if args.verbose:
            print 'Generating data and saving to {0}'.format(args.data_file)

    # Get the form of the signal distribution
    if args.signal_dist_name:
        signal_pdf = getattr(signal_distributions, '{0}_pdf'.format(args.signal_dist_name))
        noisy_signal_pdf = getattr(signal_distributions, '{0}_noisy_pdf'.format(args.signal_dist_name))
        signal_sample = getattr(signal_distributions, '{0}_sample'.format(args.signal_dist_name))
        signal_dist = ProxyDistribution(args.signal_dist_name, signal_pdf, signal_sample)
    else:
        signal_dist = GaussianKnown(args.signal_mean, args.signal_stdev)
        noisy_signal_pdf = signal_dist.noisy_pdf

    signal_weights = calculate_signal_weights(args.width, args.height,
                                                  args.default_weight,
                                                  args.region_min_x, args.region_max_x,
                                                  args.region_min_y, args.region_max_y,
                                                  args.region_weights)

    # Create the synthetic dataset
    data, signals = generate_data(args.null_mean, args.null_stdev, signal_dist, signal_weights)

    # Save the dataset to file
    np.savetxt(args.data_file, data, delimiter=',', fmt='%f')

    # Save the dataset to file
    np.savetxt(args.weights_file, signal_weights, delimiter=',', fmt='%f')

    # Save the truth to file
    np.savetxt(args.signals_file, signals, delimiter=',', fmt='%d')

    # Save the oracle posteriors to file
    oracle_signal_weight = signal_weights * noisy_signal_pdf(data)
    oracle_null_weight = (1-signal_weights) * norm.pdf(data, loc=args.null_mean, scale=args.null_stdev)
    oracle_posteriors = oracle_signal_weight / (oracle_signal_weight + oracle_null_weight)
    np.savetxt(args.oracle_file, oracle_posteriors, delimiter=',', fmt='%f')

    # Save the edges to file
    indices = np.arange(args.width * args.height).reshape((args.width, args.height))
    edges = np.array(list(zip(indices[:, :-1].flatten(), indices[:, 1:].flatten())) +\
                        list(zip(indices[:-1].flatten(), indices[1:].flatten())))
    np.savetxt(args.edges_file, edges, delimiter=',', fmt='%d')

    # Save the trails to file
    trails = np.array(list(indices) + list(indices.T))
    np.savetxt(args.trails_file, trails, delimiter=',', fmt='%d')

    # Plot the data
    if args.plot:
        plot_2d(args.plot, data, weights=None, true_weights=signal_weights)
Exemplo n.º 28
0
adj = sp.hstack([adj, feats]).tolil()
if dataset in ['protein', 'cora', 'citeseer', 'pubmed']:
    train = sp.hstack([train, feats]).tolil()
print ae.summary()

# Specify some hyperparameters
epochs = 50
train_batch_size = 8
val_batch_size = 256

print('\nFitting autoencoder model...\n')
dummy = np.empty(shape=(adj.shape[0], 1))
y_true = dummy.copy()
mask = dummy.copy()

train_data = generate_data(adj, train, feats, y_true, mask, shuffle=True)
batch_data = batch_data(train_data, train_batch_size)
num_iters_per_train_epoch = adj.shape[0] / train_batch_size
for e in xrange(epochs):
    print('\nEpoch {:d}/{:d}'.format(e + 1, epochs))
    print('Learning rate: {:6f}'.format(K.eval(ae.optimizer.lr)))
    curr_iter = 0
    train_loss = []
    for batch_adj, batch_train, batch_f, dummy_y, dummy_m in batch_data:
        # Each iteration/loop is a batch of train_batch_size samples
        if dataset in ['conflict', 'metabolic']:
            batch_adj = StandardScaler().fit_transform(batch_adj)
            res = ae.train_on_batch([batch_adj], [batch_train, batch_f])
        else:
            res = ae.train_on_batch([batch_adj], [batch_train])
        train_loss.append(res)
Exemplo n.º 29
0
from keras.layers.core import Dense, Flatten

from utils import generate_data


def mlp(nhidden=5):
    mdl = Sequential()
    mdl.add(Dense(nhidden, input_shape=(2,), activation='tanh'))
    mdl.add(Dense(1, activation='tanh'))
    mdl.compile(loss='binary_crossentropy', optimizer='adam')
    return mdl

if __name__ == '__main__':

    mdl = mlp(nhidden=25)
    X, y = generate_data()

    every = 100

    V = [0., 0.25, 0.5, 0.75, 1.]
    xm, ym = np.meshgrid(np.linspace(-1,1,200), np.linspace(-1,1,200))
    Xm = np.stack([xm.ravel(), ym.ravel()])

    loss = np.zeros(1e5)
    for j in range(loss.size):

        loss[j] = mdl.train_on_batch(X.T, y)

        if j % every == 0:
            yhat = mdl.predict(Xm.T)
            plt.contourf(xm, ym, yhat.reshape(200,200), V, cmap='RdBu')
def run(file_name, n_samples, p_n, q_n, activation = 'relu', cifar=False, tinyimagenet=False):
    np.random.seed(1215)
    tf.set_random_seed(1215)
    random.seed(1215)
    keras_model = load_model(file_name, custom_objects={'fn':fn, 'tf':tf})
    if tinyimagenet:
        model = CNNModel(keras_model, inp_shape = (64,64,3))
    elif cifar:
        model = CNNModel(keras_model, inp_shape = (32,32,3))
    else:
        model = CNNModel(keras_model)

    #Set correct linear_bounds function
    global linear_bounds
    if activation == 'relu':
        linear_bounds = relu_linear_bounds
    elif activation == 'ada':
        linear_bounds = ada_linear_bounds
    elif activation == 'sigmoid':
        linear_bounds = sigmoid_linear_bounds
    elif activation == 'tanh':
        linear_bounds = tanh_linear_bounds
    elif activation == 'arctan':
        linear_bounds = atan_linear_bounds
    upper_bound_conv.recompile()
    lower_bound_conv.recompile()
    compute_bounds.recompile()

    if cifar:
        inputs, targets, true_labels, true_ids, img_info = generate_data(CIFAR(), samples=n_samples, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.model.predict, start=0)
    elif tinyimagenet:
        inputs, targets, true_labels, true_ids, img_info = generate_data(tinyImagenet(), samples=n_samples, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.model.predict, start=0)
    else:
        inputs, targets, true_labels, true_ids, img_info = generate_data(MNIST(), samples=n_samples, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.model.predict, start=0)
    #0b01111 <- all
    #0b0010 <- random
    #0b0001 <- top2
    #0b0100 <- least

    steps = 15
    eps_0 = 0.05
    summation = 0
    warmup(model, inputs[0].astype(np.float32), eps_0, p_n, find_output_bounds)
        
    start_time = time.time()
    for i in range(len(inputs)):
        print('--- CNN-Cert: Computing eps for input image ' + str(i)+ '---')
        predict_label = np.argmax(true_labels[i])
        target_label = np.argmax(targets[i])
        weights = model.weights[:-1]
        biases = model.biases[:-1]
        shapes = model.shapes[:-1]
        W, b, s = model.weights[-1], model.biases[-1], model.shapes[-1]
        last_weight = (W[predict_label,:,:,:]-W[target_label,:,:,:]).reshape([1]+list(W.shape[1:]))
        weights.append(last_weight)
        biases.append(np.asarray([b[predict_label]-b[target_label]]))
        shapes.append((1,1,1))

        #Perform binary search
        log_eps = np.log(eps_0)
        log_eps_min = -np.inf
        log_eps_max = np.inf
        for j in range(steps):
            LB, UB = find_output_bounds(weights, biases, shapes, model.pads, model.strides, inputs[i].astype(np.float32), np.exp(log_eps), p_n)
            print("Step {}, eps = {:.5f}, {:.6s} <= f_c - f_t <= {:.6s}".format(j,np.exp(log_eps),str(np.squeeze(LB)),str(np.squeeze(UB))))
            if LB > 0: #Increase eps
                log_eps_min = log_eps
                log_eps = np.minimum(log_eps+1, (log_eps_max+log_eps_min)/2)
            else: #Decrease eps
                log_eps_max = log_eps
                log_eps = np.maximum(log_eps-1, (log_eps_max+log_eps_min)/2)
        
        if p_n == 105:
            str_p_n = 'i'
        else:
            str_p_n = str(p_n)
        
        print("[L1] method = CNN-Cert-{}, model = {}, image no = {}, true_id = {}, target_label = {}, true_label = {}, norm = {}, robustness = {:.5f}".format(activation,file_name, i, true_ids[i],target_label,predict_label,str_p_n,np.exp(log_eps_min)))
        summation += np.exp(log_eps_min)
    K.clear_session()
    
    eps_avg = summation/len(inputs)
    total_time = (time.time()-start_time)/len(inputs)
    print("[L0] method = CNN-Cert-{}, model = {}, total images = {}, norm = {}, avg robustness = {:.5f}, avg runtime = {:.2f}".format(activation,file_name,len(inputs),str_p_n,eps_avg,total_time))
    return eps_avg, total_time
# Model parameters
tf.flags.DEFINE_integer("embedding_dim", 4800, "The dimension of the embeddings")

# Testing parameters
tf.flags.DEFINE_string("checkpoint_dir", "./runs/1528468039/checkpoints", "Checkpoint directory from training run")
tf.flags.DEFINE_string("output_file", "./output.csv", "Csv file containing the results")
tf.flags.DEFINE_boolean("has_labels", False, "if has_labels => compute accuracy, if not dump output in file")

FLAGS = tf.flags.FLAGS

# load testing embeddings
all_testing_embeddings = utils.load_embeddings(FLAGS.testing_embeddings_dir,
                                               FLAGS.embedding_dim)

# generate data
test_stories, test_true_endings, test_wrong_endings = utils.generate_data(all_testing_embeddings)
test_stories = np.concatenate((test_stories, test_stories),  axis=0)
test_endings = np.concatenate((test_true_endings, test_wrong_endings), axis=0)

# construct test input
test_labels = [1] * len(test_true_endings) + [0] * len(test_wrong_endings)

## EVALUATION ##

checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
graph = tf.Graph()
with graph.as_default():
    session_conf = tf.ConfigProto(
                      allow_soft_placement=True,
                      log_device_placement=False)
    sess = tf.Session(config=session_conf)
Exemplo n.º 32
0
from boto3.dynamodb.types import TypeDeserializer
from pyperf import Runner

from utils import generate_data


data = generate_data()


def deserialize_aiodynamo():
    result = [
        {k: TypeDeserializer().deserialize(v) for k, v in item.items()} for item in data
    ]


Runner().bench_func("deserialize", deserialize_aiodynamo)
Exemplo n.º 33
0
import numpy as np
import utils as utils

# generating a visualisable 2d spiral data set
X, y = utils.generate_data()

X_train = X[:300]
X_train = [np.reshape(x, (2, 1)) for x in X_train]
X_test = X[200:]
X_test = [np.reshape(x, (2, 1)) for x in X_test]

# in the future implementation will split the test into the train and test data.
Y_train = y[:300]
Y_train = [np.reshape(utils.num_to_list(z), (3, 1)) for z in Y_train]
Y_test = y[200:]
Y_test = [np.reshape(utils.num_to_list(z), (3, 1)) for z in Y_test]

# preparing the data
train_data = list(zip(X_train, Y_train))
test_data = list(zip(X_test, Y_test))
utils.visualise(X, y)

# training the example net.
example_net = utils.initialize_new()
utils.train_ex(example_net, train_data, 300, 1, 10)
Exemplo n.º 34
0
import matplotlib.pyplot as plt
plt.ion()

from utils import generate_data, get_context


# DEBUGGING

from theano import ProfileMode
# mode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
# mode = theano.compile.DebugMode(check_py_code=False, require_matching_strides=False)
mode = None

# generate data
data = generate_data(200)

# use the predefined binary-binary RBM, which has visible units (rbm.v), hidden units (rbm.h),
# a weight matrix W connecting them (rbm.W), and visible and hidden biases (rbm.bv and rbm.bh).
n_visible = data.shape[1]
n_hidden = 100

rbm = rbms.GaussianBinaryRBM(n_visible, n_hidden)

initial_vmap = { rbm.v: T.matrix('v') }

# We use single-step contrastive divergence (CD-1) to train the RBM. For this, we can use
# the CDParamUpdater. This requires symbolic CD-1 statistics:
s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=1)

# We create an updater for each parameter variable
Exemplo n.º 35
0

if __name__ == "__main__":

    # norm ball for generate toy data
    norm = np.inf

    # build the network
    net = build_fullyconnected(norm=norm)

    # train (should take ~10s)
    numiter = 20000
    objective = np.array([net() for _ in trange(numiter)])

    # predicted class labels (on held out data)
    X_holdout, y_holdout = generate_data(norm=norm, nsamples=5000)
    yhat = net.predict(X_holdout)[0]

    # plot the training curve
    plt.figure()
    plt.plot(np.arange(numiter), objective)
    plt.xlabel("Iteration ($k$)")
    plt.ylabel("Training error ($f(k)$)")

    # plot labeled training data
    plt.figure()
    plt.scatter(X_holdout[0], X_holdout[1], s=50, c=yhat, cmap="seismic")
    plt.gca().set_aspect("equal")
    plt.xlim(-1, 1)
    plt.ylim(-1, 1)
Exemplo n.º 36
0
TEST_PATH = '../data/test.json'
WEIGHT_SAVE_PATH = '../model_weights.hdf5'

BATCH_SIZE = 32
EPOCHS = 100  # Increase this

train_data = pd.read_json(TRAIN_PATH)
train_data['inc_angle'] = train_data['inc_angle'].replace('na', 0)
train_data['inc_angle'] = train_data['inc_angle'].astype(float).fillna(0.0)

if TEST:
    SEED = np.random.randint(9999)
else:
    SEED = 42  # Constant seed for comparability between runs

X = generate_data(train_data)
y = train_data['is_iceberg']

X_train, X_val, y_train, y_val = train_test_split(X,
                                                  y,
                                                  train_size=.8,
                                                  random_state=SEED)
callback_list = get_callbacks(WEIGHT_SAVE_PATH, 20)

model = build_model()
start_time = time.time()

if USE_AUGMENTATION:
    image_augmentation = ImageDataGenerator(rotation_range=20,
                                            horizontal_flip=True,
                                            vertical_flip=True,
Exemplo n.º 37
0
                    action='store_true', default=False,
                    help='Don\'t train the neural network')
args = parser.parse_args()
if args.loss == 'MSE':
    ONE_HOT = True
    loss = ff.MSELoss()
elif args.loss == 'CrossEntropy':
    ONE_HOT = False
    loss = ff.CrossEntropyLoss()
else:
    raise ValueError('Unknown loss.')

DATASET_SIZE = 1000

## Generate dataset
train_input, train_target, test_input, test_target, test_input_raw = generate_data(DATASET_SIZE, one_hot=ONE_HOT, normalize=True)

## Create model
model = Net(nb_nodes = 25)
print(model)
if args.no_train:
    ## Load best model
    model.load('../model/best-model.pt')
    model.eval()  # Set model to eval mode
    ## Ploting results of best model
    plot_prediction(test_input, test_input_raw, test_target, model)
    plt.suptitle('Prediction of the best model')
    plt.show()
else:
    print('Using : {}Loss\n'.format(args.loss))
    ## Training model
Exemplo n.º 38
0
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from config import logging
import utils
import config

class LoadData:
    def __init__(self,aim = 'train'):
        self.aim = aim
    def load_data(self,):
logging.info('开始加载订单数据!')
df_order_data = utils.generate_data(column_names=config.order_data_names,\
                    aim='train',\
                    table_name='order_data' )
logging.info('订单数据加载完毕!')
logging.info('开始加载poi数据!')
df_order_data = utils.generate_data(column_names=config.poi_data_names,\
                    aim='train',\
                    table_name='poi_data' )
logging.info('poi数据加载完毕!')
logging.info('开始加载道路拥堵数据!')
df_traffic_data = utils.generate_data(column_names=config.traffic_data_names,\
                    aim='train',\
                    table_name='traffic_data')
logging.info('道路拥堵数据加载完成!')
logging.info('开始加载天气数据!')
df_weather_data = utils.generate_data(column_names=config.weather_data_names,\
                    aim='train',\
Exemplo n.º 39
0
from utils import generate_data

generate_data()
Exemplo n.º 40
0
                        "Use training dataset for training")
tf.flags.DEFINE_boolean("allow_soft_placement", True,
                        "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False,
                        "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS

# Prepare the data
print("Load training and validation embeddings \n")

# load training embeddings and generate training data
if FLAGS.use_training_dataset:
    all_training_embeddings = utils.load_embeddings(
        FLAGS.training_embeddings_dir, FLAGS.embedding_dim)
    training_stories, training_true_endings, training_wrong_endings = utils.generate_data(
        all_training_embeddings, FLAGS.training_negative_sampling_file)

    print("len(training_true_endings), len(training_wrong_endings)",
          len(training_true_endings), len(training_wrong_endings))
    training_stories = np.concatenate((training_stories, training_stories),
                                      axis=0)
    training_endings = np.concatenate(
        (training_true_endings, training_wrong_endings), axis=0)
    training_labels = [1] * len(training_true_endings) + [0] * len(
        training_wrong_endings)

    training_true_endings = []
    training_wrong_endings = []

# load validation embeddings and generate validation data
all_validation_embeddings = utils.load_embeddings(
Exemplo n.º 41
0
if os.path.isfile(model_weights_name):
    # Loading old model, will continue training from saved point
    print ("Loading old model...")
    model.load_weights(model_weights_name)
else:
    print ("Could not find weights, starting from scratch")


model.compile(loss='mean_squared_error',
               optimizer='adam')

# Let's see the how the output changes as the model trains
class training_monitor(Callback):
    def __init__(self):
        self.epoch = 0

    def on_epoch_end(self, epoch, logs={}):
        cur_img = model.predict(X)
        save_ndarray(args.model_output_root + "_image_epoch_" + str(self.epoch) + ".jpg", cur_img, args.pixels, args.pixels)
        model.save_weights(args.model_output_root + "_facepaint_model_epoch_" + str(self.epoch) + ".h5", overwrite=True)
        self.epoch = self.epoch + 1

image_progress_monitor = training_monitor()
#model.fit(X, Y, nb_epoch = args.epochs, batch_size = args.batch_size, callbacks=[image_progress_monitor], shuffle=True)
model.fit_generator(generator=generate_data(X,Y,mask_matrix,args.batch_size,image_size=args.pixels), steps_per_epoch=1000, epochs=args.epochs, callbacks=[image_progress_monitor])
# Save final (best?) model
model.save_weights(model_weights_name)

learnt_image = model.predict(X)
save_ndarray(args.model_output_root + "_final_image.jpg", learnt_image, args.pixels, args.pixels)
Exemplo n.º 42
0
        self.train_len=0
        self.test_len = 0
        self.valid_len = 0
        self.mode="test"

        ## transformer的参数
        self.dropout=0.5
        self.max_len=5000
        self.nhead=2


# data_path="E:/study_series/2020_3/re_write_classify/data/"
# data_path="/mnt/data3/wuchunsheng/code/nlper/NLP_task/text_classification/my_classification_cnews/2020_3_30/text_classify/data/"

config = Config()
train_iter, valid_iter, test_iter, TEXT = generate_data(config)

#model = RNNModel(config, TEXT).to(config.device)
model=TransformerModel(config, TEXT).to(config.device)

model =load_model(config, model)

#sen="目"*50
sen="体育快讯"
#sen="".join(['c', 'o', 'n', 't', 'e', 'x', 't', ',', 'l', 'a', 'b', 'e', 'l'])
#res=test_sentence(config, model ,TEXT, sen)
#print(sen)
#print(res)
#res=test(config,model,TEXT,  test_iter)
#print(res)
print("=========================")
Exemplo n.º 43
0
Arquivo: train.py Projeto: xc35/tanda
#####################################################################

if __name__ == '__main__':

    # Create log path: Create at this level (or one above, in launch script)
    # so that we use same one for both steps
    log_path = FLAGS.log_path if FLAGS.log_path is not None else \
        get_log_dir_path(FLAGS.log_root, FLAGS.run_name)

    # Note that the flags in this file control the dataset size, not the
    # normal flags in train_scripts.py!
    dims = [FLAGS.synthetic_dim]
    if FLAGS.subsample_seed > 0:
        np.random.seed(FLAGS.subsample_seed)
    X = generate_data(FLAGS.synthetic_n,
                      d=FLAGS.synthetic_dim,
                      r=FLAGS.synthetic_r)

    # For testing, also include a discriminator which is perfectly correct
    if FLAGS.oracle_disc:
        d_class = OracleDiscriminator
    else:
        d_class = SimpleDiscriminator

    ###
    ### STEP 1: TRAIN TAN
    ###
    if FLAGS.is_test:
        print("STEP 1: Training TAN")

    train_tan(X,