Пример #1
0
def main():
    # ~ Fetch arguments and logger ~
    args = get_args()
    logger = get_logger(logpath=os.path.join('./hh.log'),
                        filepath=os.path.abspath(__file__))

    # ~ Set GPU/CPU if desired ~
    if (args.use_gpu and torch.cuda.is_available()):
        device = torch.device('cuda:' + str(args.gpu))
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
    else:
        device = torch.device('cpu')
        torch.set_default_tensor_type(torch.FloatTensor)

    # ~ Fetch and Split Data ~
    data.generate_data()
    dataset = pickle.load(open("./hh.pkl", "rb"))
    if (args.shuffle_data):
        random.shuffle(dataset)
    train = dataset[:int(args.training_split * len(dataset))]
    test = dataset[int(args.training_split * len(dataset)):]

    # ~ Instantiate Model ~
    Tp = bivariate_poly(degree=args.degree).to(device)  # Kinetic
    Vq = bivariate_poly(degree=args.degree).to(device)  # Potential
    _Tp = gradient_wrapper(Tp)
    _Vq = gradient_wrapper(Vq)
    model = SSINN(_Tp, _Vq, fourth_order, tol=args.tol).to(device)

    # ~ Train Model ~
    train_model(model, train, test, args, device, logger)

    # ~ Save Model ~
    torch.save({'state_dict': model.state_dict()}, './hh_model.pth')
Пример #2
0
def gen_bound(n_train, n_test, base_param, k):
    n_gate = 1
    values = np.random.uniform(-k, +k, n_train)
    ticks = np.random.uniform(0, 1, (n_train, n_gate)) < 0.01
    train_data = generate_data(values, ticks)

    values = smoothen(np.random.uniform(-1, +1, n_test))
    ticks = np.random.uniform(0, 1, (n_test, n_gate)) < 0.01
    test_data = generate_data(values, ticks, last=train_data["output"][-1])

    return base_param, train_data, test_data
Пример #3
0
def main():
    print('--start--')

    # Folder Paths
    log_dir = './files/training_logs/'

    # Hyper parameters
    num_features = 5
    classes = ['Dead', 'Alive: Wrong Direction', 'Alive: Right Direction']
    num_classes = len(classes)

    epochs = 10
    batch_size = 128
    learning_rate = 0.01

    # Load Data
    x_train, y_train = Data.generate_data(80000, num_features, num_classes)
    x_valid, y_valid = Data.generate_data(16000, num_features, num_classes)

    # Build model
    model = Model.build_model(num_features, num_classes, learning_rate)

    # View model summary
    model.summary()

    # Check memory needed during the training process (not accurate)
    Model.get_model_memory_usage(batch_size, model)

    # Get optimizer name
    opt_name = model.optimizer.__class__.__name__
    # Get folder name
    hparam_str = make_hparam_string(opt_name, learning_rate, batch_size, epochs)
    log_dir += hparam_str
    output_dir = log_dir + 'model/'
    # Create folder
    prepare_dir(output_dir)

    # Train the model
    train(model, x_train, y_train, x_valid, y_valid, batch_size, epochs, log_dir)

    # Evaluate the model
    evaluate(model, classes, x_valid, y_valid, output_dir)

    # Save the model
    Model.save_model(model, classes, output_dir)

    # Test on game
    # test_in_game(model, 1000, False, True, 200)

    # Visualize
    # plt.show()

    print('--end--')
Пример #4
0
def gen_discrete(n_train, n_test, base_param, k):
    n_gate = 1
    values = np.random.uniform(-1, +1, n_train)
    ticks = np.random.uniform(0, 1, (n_train, n_gate)) < 0.01
    discrete_values = np.random.uniform(-1, 1, k)
    idx = np.where(ticks == 1)[0]
    values[idx] = np.random.choice(discrete_values, len(idx))
    train_data = generate_data(values, ticks)

    values = smoothen(np.random.uniform(-1, +1, n_test))
    ticks = np.random.uniform(0, 1, (n_test, n_gate)) < 0.01
    test_data = generate_data(values, ticks, last=train_data["output"][-1])
    return base_param, train_data, test_data
Пример #5
0
def gen_gate(n_train, n_test, base_param, k):
    n_gate = k
    values = np.random.uniform(-1, +1, n_train)
    ticks = np.random.uniform(0, 1, (n_train, n_gate)) < 0.01
    train_data = generate_data(values, ticks)

    values = smoothen(np.random.uniform(-1, +1, n_test))
    ticks = np.random.uniform(0, 1, (n_test, n_gate)) < 0.01
    test_data = generate_data(values, ticks, last=train_data["output"][-1])

    param = base_param.copy()
    param["shape"] = (1 + n_gate, param["shape"][1], n_gate)

    return param, train_data, test_data
Пример #6
0
def gen_trigger(n_train, n_test, base_param, k):
    n_gate = 1
    values = np.random.uniform(-1, +1, n_train)
    ticks_interval = np.random.randint(1, k + 1, size=(n_train))
    ticks_time = np.cumsum(ticks_interval)
    i_max = np.max(np.where(ticks_time < n_train)[0])
    ticks = np.zeros((n_train, ))
    ticks[ticks_time[:i_max]] = 1
    train_data = generate_data(values, ticks)

    values = smoothen(np.random.uniform(-1, +1, n_test))
    ticks = np.random.uniform(0, 1, (n_test, n_gate)) < 0.01
    test_data = generate_data(values, ticks, last=train_data["output"][-1])

    return base_param, train_data, test_data
Пример #7
0
    def _update_baseline(self, model, epoch):

        # Load or copy baseline model based on self.from_checkpoint condition
        if self.from_checkpoint and self.alpha == 0:
            print('Baseline model loaded')
            self.model = load_model(self.path_to_checkpoint,
                                    embed_dim=self.embed_dim,
                                    n_customer=self.n_customer)
        else:
            print('Baseline model copied')
            self.model = copy_model(model,
                                    embed_dim=self.embed_dim,
                                    n_customer=self.n_customer)
            # For checkpoint
            self.model.save_weights('%s%s_baseline_epoch%s.h5' %
                                    (self.weight_dir, self.task, epoch),
                                    save_format='h5')
        # We generate a new dataset for baseline model on each baseline update to prevent possible overfitting
        self.dataset = generate_data(n_samples=self.n_rollout_samples,
                                     n_customer=self.n_customer)

        print(
            f'Evaluating baseline model on baseline dataset (epoch = {epoch})')
        self.bl_vals = rollout(self.model, self.dataset)
        self.mean = tf.reduce_mean(self.bl_vals)
        self.cur_epoch = epoch
Пример #8
0
def train_selected_model(activation,
                         learning_rate,
                         momentum,
                         n_points,
                         n_epochs,
                         batch_size,
                         plot_points=False):
    train_data, test_data = data.generate_data(n_points)

    model = train.build_model(activation)
    optimizer = optim.SGD(model.parameters(),
                          lr=learning_rate,
                          momentum=momentum)
    criterion = framework.MSELoss()

    t0 = time.perf_counter()
    history = train.train_model(model, optimizer, criterion, train_data,
                                test_data, n_epochs, batch_size)
    t1 = time.perf_counter()

    result = {
        'train_loss': train.compute_loss(model, criterion, train_data,
                                         batch_size),
        'test_loss': train.compute_error(model, train_data, batch_size) * 100,
        'train_err': train.compute_loss(model, criterion, test_data,
                                        batch_size),
        'test_err': train.compute_error(model, test_data, batch_size) * 100,
        'time': t1 - t0
    }

    if plot_points:
        plot.plot_points(test_data, train_data, model, plot_points)

    return history, result
Пример #9
0
def add_contract(source, destination, provider, payload, amount, signedContract):
    encode_data = (
        source.encode()
        + destination.encode()
        + provider.encode()
        + payload.encode()
        + amount.encode()
    )
    # TEST
    # if not verify_sign(provider, encode_data, signedContract):
    #    return None
    # else:
    ######new_contract = Contract(str(time.time()), source, destination, provider, payload, amount)
    new_contract = Contract(str(123), source, destination, provider, payload, amount)
    token_ledger[source] = str(int(token_ledger[source]) - int(new_contract.stake))
    token_ledger[destination] = str(
        int(token_ledger[destination]) - int(new_contract.stake)
    )
    active_contract_list.append(new_contract.serialize())

    block_data = generate_data(
        new_contract.serialize(), None, token_ledger, active_contract_list
    )
    node_chain_instance.add_block(block_data)

    """
    for i in range(len(node_chain_instance.block_data)):
        print(str(node_chain_instance.block_data[i]))
    #print((json.loads(node_chain_instance.block_data[-1].data)).get('ledger'))
    print((json.loads(node_chain_instance.block_data[-1].data)))
    """

    return block_data
Пример #10
0
def roc_curves(n_samples, p_value=False):
    """
    """
    data = generate_data(n_samples=n_samples)
    X, Y = data[:, :-1], data[:, -1]
    if p_value:
        I = np.abs(0.5 - compute_values(X))
    else:
        I = mutual_information(X, Y)
    tau_min, tau_max = I.min(), I.max()
    if not p_value and tau_min < 0:
        tau_min = 0
        print "warning - tau_min < 0"

    step = 500
    erange = [tau_min + i * (tau_max - tau_min) * 1. / step
              for i
              in range(step)]

    roc_x = []
    roc_y = []
    for tau in erange:
        pos = I > tau
        tp = pos[1000:].sum()
        tn = (1 - pos[:1000]).sum()
        roc_x.append(float(tp) / (200))
        roc_y.append(1 - float(tn) / (1000))
    return np.array(roc_x), np.array(roc_y), np.array(erange)
Пример #11
0
def gen_value(n_train, n_test, base_param, k):
    n_gate = 1
    values = np.random.uniform(-1, +1, (n_train, k))
    ticks = np.random.uniform(0, 1, (n_train, n_gate)) < 0.01
    train_data = generate_data(values, ticks)

    values = np.empty((n_test, k))
    for i in range(k):
        values[:, i] = smoothen(np.random.uniform(-1, +1, n_test))
    ticks = np.random.uniform(0, 1, (n_test, n_gate)) < 0.01
    test_data = generate_data(values, ticks, last=train_data["output"][-1])

    param = base_param.copy()
    param["shape"] = (k + n_gate, param["shape"][1], n_gate)

    return param, train_data, test_data
Пример #12
0
def add_transaction(source, destination, provider, payload, amount):

    new_trans = Transaction(
        str(time.time()), source, destination, provider, payload, amount
    )
    token_ledger[source] = str(int(token_ledger[source]) + int(new_trans.amount))
    token_ledger[destination] = str(
        int(token_ledger[destination]) + int(new_trans.amount)
    )

    block_data = generate_data(
        None, new_trans.serialize(), token_ledger, active_contract_list
    )
    node_chain_instance.add_block(block_data)

    transaction = (json.loads(node_chain_instance.block_data[-1].data)).get("transactions")
    if transaction is not None:
        transaction_list.append(transaction)
    print(transaction_list)

    """
    for i in range(len(node_chain_instance.block_data)):
        print(str(node_chain_instance.block_data[i]))
    print((json.loads(node_chain_instance.block_data[-1].data)))
    """

    return block_data
Пример #13
0
def n_mutual_information(n, n_samples):
    nI = np.zeros((n, 1200))
    for i in range(n):
        data = generate_data(n_samples=n_samples)
        X, Y = data[:, :-1], data[:, -1]
        nI[i, :] = mutual_information(X, Y)

    return nI
Пример #14
0
def test_compute_coefficients(size, mean1, mean2, num_threads):
    X, Y = generate_data(size, mean1, mean2)
    lrpy = LogisticRegressionPy()
    lrpy_coef = lrpy.compute_coefficients(X, Y)
    lrcpp = LogisticRegressionCpp(num_threads=num_threads)
    lrcpp_coef = lrcpp.compute_coefficients(X, Y)
    assert abs(lrpy_coef[0] - lrcpp_coef[0]) < 0.01
    assert abs(lrpy_coef[1] - lrcpp_coef[1]) < 0.01
    assert abs(lrpy_coef[2] - lrcpp_coef[2]) < 0.01
Пример #15
0
def main(args):
    x_train, y_train, x_test, y_test = generate_data(args.samples,
                                                     args.seq_len,
                                                     args.seq_dim)
    model = train(x_train, y_train, args)

    y_pred = evaluate_model(model, x_train, y_train, args)
    y_pred = format_predictions(y_pred, args.seq_len, args.seq_dim)

    plot_predictions(x_train, y_train, y_pred)
Пример #16
0
def load_model(path, embed_dim=128, n_customer=20, n_encode_layers=3):
    """ Load model weights from hd5 file
		https://stackoverflow.com/questions/51806852/cant-save-custom-subclassed-model
	"""
    small_dataset = generate_data(n_samples=5, n_customer=n_customer)
    model_loaded = AttentionModel(embed_dim, n_encode_layers=n_encode_layers)
    for data in (small_dataset.batch(5)):
        _, _ = model_loaded(data, decode_type='greedy')

    model_loaded.load_weights(path)
    return model_loaded
Пример #17
0
def main(args):
    x_train, y_train, x_test, y_test = generate_data(args.samples,
                                                     args.seq_len)

    model = train(x_train, y_train, args)
    y_pred = evaluate_model(model, x_train, args)

    print(y_pred)
    print(y_train)

    plot_predictions(x_train, y_train, y_pred)
Пример #18
0
def estimate(T, N, sigmas, coeff, generate_data, estimators):

    y, X = generate_data(T, N, sigmas, coeff)

    betas_error = []

    for estimator_fn in estimators:
        beta, std_error = estimator_fn(y, X)
        betas_error.append((beta, std_error))

    return betas_error
Пример #19
0
def main(targets):
    data_config = json.load(open('config/data-params.json'))
    main_model_config = json.load(open('config/main-model-params.json'))
    
    if 'test' in targets:
        dataset = generate_data(**data_config)
        save_data(dataset, **data_config)
        first_baseline_rsme = first_base()
        knn_baseline_rsme = knn_base()
        main_rsme = build_model(dataset, **main_model_config)
        print('Main RMSE: ',main_rsme,'First baseline RMSE: ', first_baseline_rsme, 
              'KNN baseline RMSE: ', knn_baseline_rsme)
Пример #20
0
def create_data(
    train_size: int,
    dev_size: int,
    n_parts: int,
    train_part_range: Tuple[int, int],
    dev_part_range: Tuple[int, int],
) -> Dict:
    circuit_gen = CircuitGenerator(n_parts)

    data = generate_data(circuit_gen, train_size, train_part_range, dev_size,
                         dev_part_range)

    return data, circuit_gen
Пример #21
0
def copy_model(model, embed_dim=128, n_customer=20):
    """ Copy model weights to new model
		https://stackoverflow.com/questions/56841736/how-to-copy-a-network-in-tensorflow-2-0
	"""
    small_dataset = generate_data(n_samples=5, n_customer=n_customer)
    new_model = AttentionModel(embed_dim)
    for data in (small_dataset.batch(5)):
        # _, _ = model(data, decode_type = 'sampling')
        cost, _ = new_model(data, decode_type='sampling')

    for a, b in zip(new_model.variables, model.variables):
        a.assign(b)  # copies the weigths variables of model_b into model_a
    return new_model
Пример #22
0
def compute_ratios(tau, p_value=False):
    data = generate_data(n_samples=1000)
    X, Y = data[:, :-1], data[:, -1]
    if p_value:
        I = np.abs(0.5 - compute_values(X))
    else:
        I = mutual_information(X, Y)
    pos = I > tau
    fp = pos[:1000].sum()
    fn = (1 - pos[1000:]).sum()
    tp = pos[1000:].sum()
    tn = (1 - pos[:1000]).sum()

    return fp, fn, tp, tn
Пример #23
0
def main():
    args = config.parse_command_line_arguments()

    inputs_train, targets_train, inputs_test, targets_test = data.generate_data(
        args)
    results = {
        'inputs_train': inputs_train,
        'targets_train': targets_train,
        'inputs_test': inputs_test,
        'targets_test': targets_test
    }

    mdl = model.create_model(args, inputs_train, targets_train)

    train_model(args, mdl, results)
Пример #24
0
def train(cfg, log_path = None):
	
	model = AttentionModel(cfg.embed_dim, cfg.n_encode_layers, cfg.n_heads, 
						cfg.tanh_clipping, 'sampling')
	baseline = RolloutBaseline(model, cfg.task, cfg.weight_dir, cfg.n_rollout_samples, 
								cfg.embed_dim, cfg.n_customer, cfg.warmup_beta, cfg.wp_epochs)
	optimizer = tf.keras.optimizers.Adam(learning_rate = cfg.lr)
	ave_loss = tf.keras.metrics.Mean()
	ave_L = tf.keras.metrics.Mean()
	
	for epoch in tqdm(range(cfg.epochs), desc = 'epoch'):
		t1 = time()
		dataset = generate_data(cfg.n_samples, cfg.n_customer)
		bs = baseline.eval_all(dataset)
		bs = tf.reshape(bs, (-1, cfg.batch)) if bs is not None else None # bs: (cfg.batch_steps, cfg.batch) or None
		
		for t, inputs in enumerate(dataset.batch(cfg.batch)):
			with tf.GradientTape() as tape:
				L, logp = model(inputs)
				b = bs[t] if bs is not None else baseline.eval(inputs, L)
				b = tf.stop_gradient(b)
				loss = tf.reduce_mean((L - b) * logp)
				L_mean = tf.reduce_mean(L)
			grads = tape.gradient(loss, model.trainable_weights)# model.trainable_weights == thita
			grads, _ = tf.clip_by_global_norm(grads, 1.0)
			optimizer.apply_gradients(zip(grads, model.trainable_weights))# optimizer.step

			ave_loss.update_state(loss)
			ave_L.update_state(L_mean)
			if t%(cfg.batch_steps*0.1) == 0:
				print('epoch%d, %d/%dsamples: loss %1.2f, average L %1.2f, average b %1.2f\n'%(
						epoch, t*cfg.batch, cfg.n_samples, ave_loss.result().numpy(), ave_L.result().numpy(), tf.reduce_mean(b)))

		baseline.epoch_callback(model, epoch)
		model.decode_type = 'sampling'
		model.save_weights('%s%s_epoch%s.h5'%(cfg.weight_dir, cfg.task, epoch), save_format = 'h5')
		
		if cfg.islogger:
				if log_path is None:
					log_path = '%s%s_%s.csv'%(cfg.log_dir, cfg.task, cfg.dump_date)#cfg.log_dir = ./Csv/
					with open(log_path, 'w') as f:
						f.write('time,epoch,loss,average length\n')
				with open(log_path, 'a') as f:
					t2 = time()
					f.write('%dmin%dsec,%d,%1.2f,%1.2f\n'%((t2-t1)//60, (t2-t1)%60, epoch, ave_loss.result().numpy(), ave_L.result().numpy()))

		ave_loss.reset_states()
		ave_L.reset_states()
Пример #25
0
def main():
    inputs_train, targets_train, inputs_test, targets_test = data.generate_data(
        args)
    results = {
        'inputs_train': inputs_train,
        'targets_train': targets_train,
        'inputs_test': inputs_test,
        'targets_test': targets_test
    }

    mdl = model.create_model(
        args, inputs_train,
        targets_train)  # Actual Model that is being observed
    mdl_test = model.create_model(
        args, inputs_train,
        targets_train)  # Dummy Model for calculating gradient
    train_model(args, mdl, mdl_test, results)
Пример #26
0
def train_selected_model(activation: ty.Union[framework.Tanh, framework.ReLU],
                         learning_rate: float,
                         momentum: float,
                         n_points: int,
                         n_epochs: int,
                         batch_size: int,
                         track_history: bool = False,
                         plot_points: bool = False):
    """
    Train a miniproject model with a given activation using SGD and MSE loss.

    :param activation: activation function
    :param learning_rate: SGD learning rate
    :param momentum: SGD momentum
    :param n_points: number of points in training and test data
    :param n_epochs: number of epochs
    :param batch_size: batch size
    :param trach_history: track training and test error and loss by epoch
    :param plot_points: generate plots visualing model predictions of the training and test data
    :returns: (history dictionary, final results)
    """
    train_data, test_data = data.generate_data(n_points)

    model = train.build_model(activation)
    optimizer = framework.SGD(model, lr=learning_rate, momentum=momentum)
    criterion = framework.MSELoss(model)

    t0 = time.perf_counter()
    history = train.train_model(model, optimizer, criterion, train_data,
                                test_data, n_epochs, batch_size, track_history)
    t1 = time.perf_counter()

    result = {
        'train_loss': train.compute_loss(model, criterion, train_data,
                                         batch_size),
        'test_loss': train.compute_error(model, train_data, batch_size) * 100,
        'train_err': train.compute_loss(model, criterion, test_data,
                                        batch_size),
        'test_err': train.compute_error(model, test_data, batch_size) * 100,
        'time': t1 - t0
    }

    if plot_points:
        plot.plot_points(test_data, train_data, model, plot_points)

    return history, result
Пример #27
0
    def profile():
        data = generate_data()[-1][1]

        funcs_generated_data = [
            create_data_increasing_depth, create_data_decreasing_depth
        ]

        funcs = [
            outer_flatten_1, outer_flatten_2, niccolum_flatten, tishka_flatten,
            zart_flatten, recursive_flatten_generator,
            recursive_flatten_iterator, tishka_flatten_with_stack
        ]

        for func_generated_data in funcs_generated_data:
            creating_data = func_generated_data(**data)
            for func in funcs:
                list(func(creating_data))
                time.sleep(0.3)
Пример #28
0
def main():
    """
    Demonstrate the NormalDistribution class with a dataset
    created using the data module.
    """

    print("-----------------------")
    print("| codedrome.com       |")
    print("| Normal Distribution |")
    print("-----------------------\n")

    d = data.generate_data()

    nd = normaldistribution.NormalDistribution()

    nd.data = d

    nd.calculate_prob_dist()

    nd.print_prob_dist()
Пример #29
0
    np.random.seed(1)

    # Build memory
    n_gate = 1
    model = generate_model(shape=(1 + n_gate, 1000, n_gate),
                           sparsity=0.5,
                           radius=0.01,
                           scaling=0.25,
                           leak=1.0,
                           noise=0.0001)

    # Training data
    n = 25000
    values = np.random.uniform(-1, +1, n)
    ticks = np.random.uniform(0, 1, (n, n_gate)) < 0.01
    train_data = generate_data(values, ticks)

    error = train_model(model, train_data)
    print("Training error : {0}".format(error))

    # Testing data
    n = 2500
    values = np.cos(np.linspace(0, 20 * np.pi, n))
    ticks = np.zeros(n)
    ticks[::25] = 1
    test_data = generate_data(values, ticks, last=train_data["output"][-1])

    error = test_model(model, test_data)
    print("Testing error : {0}".format(error))

    # Display
Пример #30
0
    fltrs.ewma_adaptive_variance_linear,
    # fltrs.ewma_variance,
    # fltrs.maww,
    # fltrs.des
]

configs = [
    data_trend,
    # data_simple,
    data_complex_trend,
    data_variation,
    # data_reversed_trend,
    data_trend_jump,
]
for config in configs:
    data.append(generate_data(config))

for f in filters:
    index = filters.index(f)
    filtered.append([])
    for x, y in data:
        print "Filtering data using {}".format(f.func_name)
        x_ = f(x)
        y_ = np.range(len(x_)) if type(x_) is not tuple else ([range(len(x_[0]))] * len(x_))
        filtered[index].append((x_, y_))

dim = Subplots.get_dimensions(len(data))
figure, subplots = plt.subplots(*dim, sharex=True, sharey=True)
plots = Subplots(subplots)
plots.grid(True)
Пример #31
0
    def check_training(self, model_type, label_type):
        print('----- ' + model_type + ', ' + label_type + ' -----')
        tf.reset_default_graph()
        with tf.Graph().as_default():
            # Load batch data
            batch_size = 4
            inputs, labels, inputs_seq_len, labels_seq_len = generate_data(
                label_type=label_type,
                model='attention',
                batch_size=batch_size)

            # Define placeholders
            inputs_pl = tf.placeholder(
                tf.float32,
                shape=[batch_size, None, inputs.shape[-1]],
                name='input')

            # `[batch_size, max_time]`
            labels_pl = tf.placeholder(tf.int32,
                                       shape=[None, None],
                                       name='label')

            # These are prepared for computing LER
            indices_true_pl = tf.placeholder(tf.int64, name='indices')
            values_true_pl = tf.placeholder(tf.int32, name='values')
            shape_true_pl = tf.placeholder(tf.int64, name='shape')
            labels_st_true_pl = tf.SparseTensor(indices_true_pl,
                                                values_true_pl, shape_true_pl)
            indices_pred_pl = tf.placeholder(tf.int64, name='indices')
            values_pred_pl = tf.placeholder(tf.int32, name='values')
            shape_pred_pl = tf.placeholder(tf.int64, name='shape')
            labels_st_pred_pl = tf.SparseTensor(indices_pred_pl,
                                                values_pred_pl, shape_pred_pl)
            inputs_seq_len_pl = tf.placeholder(tf.int32,
                                               shape=[None],
                                               name='inputs_seq_len')
            labels_seq_len_pl = tf.placeholder(tf.int32,
                                               shape=[None],
                                               name='labels_seq_len')
            keep_prob_input_pl = tf.placeholder(tf.float32,
                                                name='keep_prob_input')
            keep_prob_hidden_pl = tf.placeholder(tf.float32,
                                                 name='keep_prob_hidden')

            # Define model graph
            output_size = 26 + 2 if label_type == 'character' else 61 + 2
            # model = load(model_type=model_type)
            network = BLSTMAttetion(batch_size=batch_size,
                                    input_size=inputs[0].shape[1],
                                    encoder_num_unit=256,
                                    encoder_num_layer=2,
                                    attention_dim=128,
                                    decoder_num_unit=256,
                                    decoder_num_layer=1,
                                    embedding_dim=20,
                                    output_size=output_size,
                                    sos_index=output_size - 2,
                                    eos_index=output_size - 1,
                                    max_decode_length=50,
                                    attention_weights_tempareture=1,
                                    logits_tempareture=1,
                                    parameter_init=0.1,
                                    clip_grad=5.0,
                                    clip_activation_encoder=50,
                                    clip_activation_decoder=50,
                                    dropout_ratio_input=1.0,
                                    dropout_ratio_hidden=1.0,
                                    weight_decay=0,
                                    beam_width=0,
                                    time_major=False)

            # Add to the graph each operation
            loss_op, logits, decoder_outputs_train, decoder_outputs_infer = network.compute_loss(
                inputs_pl, labels_pl, inputs_seq_len_pl, labels_seq_len_pl,
                keep_prob_input_pl, keep_prob_hidden_pl)
            learning_rate = 1e-3
            train_op = network.train(loss_op,
                                     optimizer='rmsprop',
                                     learning_rate_init=learning_rate,
                                     is_scheduled=False)
            decode_op_train, decode_op_infer = network.decoder(
                decoder_outputs_train,
                decoder_outputs_infer,
                decode_type='greedy',
                beam_width=1)
            ler_op = network.compute_ler(labels_st_true_pl, labels_st_pred_pl)
            attention_weights = decoder_outputs_infer.attention_scores

            # Add the variable initializer operation
            init_op = tf.global_variables_initializer()

            # Count total parameters
            parameters_dict, total_parameters = count_total_parameters(
                tf.trainable_variables())
            for parameter_name in sorted(parameters_dict.keys()):
                print("%s %d" %
                      (parameter_name, parameters_dict[parameter_name]))
            print("Total %d variables, %s M parameters" %
                  (len(parameters_dict.keys()), "{:,}".format(
                      total_parameters / 1000000)))

            # Make feed dict
            feed_dict = {
                inputs_pl: inputs,
                labels_pl: labels,
                inputs_seq_len_pl: inputs_seq_len,
                labels_seq_len_pl: labels_seq_len,
                keep_prob_input_pl: network.dropout_ratio_input,
                keep_prob_hidden_pl: network.dropout_ratio_hidden,
                network.lr: learning_rate
            }

            with tf.Session() as sess:

                # Initialize parameters
                sess.run(init_op)

                # Wrapper for tfdbg
                # sess = tf_debug.LocalCLIDebugWrapperSession(sess)

                # Train model
                max_steps = 400
                start_time_global = time.time()
                start_time_step = time.time()
                ler_train_pre = 1
                not_improved_count = 0
                for step in range(max_steps):

                    # Compute loss
                    _, loss_train = sess.run([train_op, loss_op],
                                             feed_dict=feed_dict)

                    # Gradient check
                    # grads = sess.run(network.clipped_grads,
                    #                  feed_dict=feed_dict)
                    # for grad in grads:
                    #     print(np.max(grad))

                    if (step + 1) % 10 == 0:
                        # Change to evaluation mode
                        feed_dict[keep_prob_input_pl] = 1.0
                        feed_dict[keep_prob_hidden_pl] = 1.0

                        # Predict class ids
                        predicted_ids_train, predicted_ids_infer = sess.run(
                            [decode_op_train, decode_op_infer],
                            feed_dict=feed_dict)

                        # Compute accuracy
                        feed_dict_ler = {
                            labels_st_true_pl:
                            list2sparsetensor(labels),
                            labels_st_pred_pl:
                            list2sparsetensor(predicted_ids_infer)
                        }
                        ler_train = sess.run(ler_op, feed_dict=feed_dict_ler)

                        duration_step = time.time() - start_time_step
                        print('Step %d: loss = %.3f / ler = %.4f (%.3f sec)' %
                              (step + 1, loss_train, ler_train, duration_step))
                        start_time_step = time.time()

                        # Visualize
                        if label_type == 'character':
                            print('True            : %s' %
                                  num2alpha(labels[0]))
                            print('Pred (Training) : <%s' %
                                  num2alpha(predicted_ids_train[0]))
                            print('Pred (Inference): <%s' %
                                  num2alpha(predicted_ids_infer[0]))
                        else:
                            print('True            : %s' %
                                  num2phone(labels[0]))
                            print('Pred (Training) : < %s' %
                                  num2phone(predicted_ids_train[0]))
                            print('Pred (Inference): < %s' %
                                  num2phone(predicted_ids_infer[0]))

                        if ler_train >= ler_train_pre:
                            not_improved_count += 1
                        else:
                            not_improved_count = 0
                        if not_improved_count >= 5:
                            print('Model is Converged.')
                            break
                        ler_train_pre = ler_train

                duration_global = time.time() - start_time_global
                print('Total time: %.3f sec' % (duration_global))
Пример #32
0
    def check_training(self):
        print('----- multitask -----')
        tf.reset_default_graph()
        with tf.Graph().as_default():
            # Load batch data
            batch_size = 4
            inputs, labels_true_char_st, labels_true_phone_st, inputs_seq_len = generate_data(
                label_type='multitask', model='ctc', batch_size=batch_size)

            # Define placeholders
            inputs_pl = tf.placeholder(tf.float32,
                                       shape=[None, None, inputs.shape[-1]],
                                       name='input')
            indices_pl = tf.placeholder(tf.int64, name='indices')
            values_pl = tf.placeholder(tf.int32, name='values')
            shape_pl = tf.placeholder(tf.int64, name='shape')
            labels_pl = tf.SparseTensor(indices_pl, values_pl, shape_pl)
            indices_second_pl = tf.placeholder(tf.int64, name='indices_second')
            values_second_pl = tf.placeholder(tf.int32, name='values_second')
            shape_second_pl = tf.placeholder(tf.int64, name='shape_second')
            labels_second_pl = tf.SparseTensor(indices_second_pl,
                                               values_second_pl,
                                               shape_second_pl)
            inputs_seq_len_pl = tf.placeholder(tf.int64,
                                               shape=[None],
                                               name='inputs_seq_len')
            keep_prob_input_pl = tf.placeholder(tf.float32,
                                                name='keep_prob_input')
            keep_prob_hidden_pl = tf.placeholder(tf.float32,
                                                 name='keep_prob_hidden')

            # Define model graph
            output_size_main = 26
            output_size_second = 61
            network = Multitask_BLSTM_CTC(
                batch_size=batch_size,
                input_size=inputs[0].shape[1],
                num_unit=256,
                num_layer_main=2,
                num_layer_second=1,
                output_size_main=output_size_main,
                output_size_second=output_size_second,
                main_task_weight=0.8,
                parameter_init=0.1,
                clip_grad=5.0,
                clip_activation=50,
                dropout_ratio_input=1.0,
                dropout_ratio_hidden=1.0,
                num_proj=None,
                weight_decay=1e-6)

            # Add to the graph each operation
            loss_op, logits_main, logits_second = network.compute_loss(
                inputs_pl, labels_pl, labels_second_pl, inputs_seq_len_pl,
                keep_prob_input_pl, keep_prob_hidden_pl)
            learning_rate = 1e-3
            train_op = network.train(loss_op,
                                     optimizer='rmsprop',
                                     learning_rate_init=learning_rate,
                                     is_scheduled=False)
            decode_op_main, decode_op_second = network.decoder(
                logits_main,
                logits_second,
                inputs_seq_len_pl,
                decode_type='beam_search',
                beam_width=20)
            ler_op_main, ler_op_second = network.compute_ler(
                decode_op_main, decode_op_second, labels_pl, labels_second_pl)

            # Add the variable initializer operation
            init_op = tf.global_variables_initializer()

            # Count total parameters
            parameters_dict, total_parameters = count_total_parameters(
                tf.trainable_variables())
            for parameter_name in sorted(parameters_dict.keys()):
                print("%s %d" %
                      (parameter_name, parameters_dict[parameter_name]))
            print("Total %d variables, %s M parameters" %
                  (len(parameters_dict.keys()), "{:,}".format(
                      total_parameters / 1000000)))

            # Make feed dict
            feed_dict = {
                inputs_pl: inputs,
                labels_pl: labels_true_char_st,
                labels_second_pl: labels_true_phone_st,
                inputs_seq_len_pl: inputs_seq_len,
                keep_prob_input_pl: network.dropout_ratio_input,
                keep_prob_hidden_pl: network.dropout_ratio_hidden,
                network.lr: learning_rate
            }

            with tf.Session() as sess:
                # Initialize parameters
                sess.run(init_op)

                # Wrapper for tfdbg
                # sess = tf_debug.LocalCLIDebugWrapperSession(sess)

                # Train model
                max_steps = 400
                start_time_global = time.time()
                start_time_step = time.time()
                ler_train_char_pre = 1
                not_improved_count = 0
                for step in range(max_steps):

                    # Compute loss
                    _, loss_train = sess.run([train_op, loss_op],
                                             feed_dict=feed_dict)

                    # Gradient check
                    # grads = sess.run(network.clipped_grads, feed_dict=feed_dict)
                    # for grad in grads:
                    #     print(np.max(grad))

                    if (step + 1) % 10 == 0:
                        # Change to evaluation mode
                        feed_dict[keep_prob_input_pl] = 1.0
                        feed_dict[keep_prob_hidden_pl] = 1.0

                        # Compute accuracy
                        ler_train_char, ler_train_phone = sess.run(
                            [ler_op_main, ler_op_second], feed_dict=feed_dict)

                        duration_step = time.time() - start_time_step
                        print(
                            'Step %d: loss = %.3f / cer = %.4f / per = %.4f (%.3f sec)\n'
                            % (step + 1, loss_train, ler_train_char,
                               ler_train_phone, duration_step))
                        start_time_step = time.time()

                        # Visualize
                        labels_pred_char_st, labels_pred_phone_st = sess.run(
                            [decode_op_main, decode_op_second],
                            feed_dict=feed_dict)
                        labels_true_char = sparsetensor2list(
                            labels_true_char_st, batch_size=batch_size)
                        labels_true_phone = sparsetensor2list(
                            labels_true_phone_st, batch_size=batch_size)
                        labels_pred_char = sparsetensor2list(
                            labels_pred_char_st, batch_size=batch_size)
                        labels_pred_phone = sparsetensor2list(
                            labels_pred_phone_st, batch_size=batch_size)

                        # character
                        print('Character')
                        print('  True: %s' % num2alpha(labels_true_char[0]))
                        print('  Pred: %s' % num2alpha(labels_pred_char[0]))
                        print('Phone')
                        print('  True: %s' % num2phone(labels_true_phone[0]))
                        print('  Pred: %s' % num2phone(labels_pred_phone[0]))
                        print('----------------------------------------')

                        if ler_train_char >= ler_train_char_pre:
                            not_improved_count += 1
                        else:
                            not_improved_count = 0
                        if not_improved_count >= 5:
                            print('Modle is Converged.')
                            break
                        ler_train_char_pre = ler_train_char

                        # Change to training mode
                        network.is_training = True

                duration_global = time.time() - start_time_global
                print('Total time: %.3f sec' % (duration_global))
Пример #33
0
                         embed_dim=128,
                         n_customer=args.n_customer,
                         n_encode_layers=3)
 print(f'model loading time:{time()-t1}s')
 if args.txt is not None:
     datatxt = data_from_txt(args.txt)
     data = []
     for i in range(3):
         elem = [datatxt[i].squeeze(0) for j in range(args.batch)]
         data.append(torch.stack(elem, 0))
 else:
     # data = generate_data(n_samples = 2, n_customer = args.n_customer, seed = args.seed)
     data = []
     for i in range(3):
         elem = [
             generate_data(1, args.n_customer, args.seed)[i]
             for j in range(args.batch)
         ]
         data.append(torch.stack(elem, 0))
 print(f'data generate time:{time()-t1}s')
 device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
 pretrained = pretrained.to(device)
 data = list(map(lambda x: x.to(device), data))
 pretrained.eval()
 with torch.no_grad():
     costs, _, pi = pretrained(data,
                               return_pi=True,
                               decode_type=args.decode_type)
 print('costs:', costs)
 idx_in_batch = torch.argmin(costs, dim=0)
 print(
np.random.seed(0)
LAMBDAS = np.logspace(-2, 2, 13)
xxx = np.linspace(0, 1, 1000)
y_true = true_func(xxx)
results = {
    'error': [],
    'bias^2': [],
    'variance': [],
}
start = time()
for l in LAMBDAS:
    errors = []
    betas = []
    for _ in range(100):
        X, Y = generate_data(length=25, gaussian_noise=0.1)
        beta = fit_polynomial_regression(X, Y, degree=12, l=l)
        if time() - start > 60:
            raise SystemExit()
        betas.append(beta)
        errors.append(mean_square_loss(y_hat(xxx, beta), y_true))

    results['error'].append(np.mean(errors))
    y_hats = np.array([y_hat(xxx, b) for b in betas]).T
    bias_2, var = bias2_variance(y_true, y_hats)
    results['bias^2'].append(bias_2)

    results['variance'].append(var)


def test_best_lambda():