def train_step(source_seq, target_seq, en_initial_states):
    """ Execute one training step (forward pass + backward pass)
    Args:
        source_seq: source sequences
        target_seq: input target sequences (<start> + ... + <end>)

    Returns:
        The loss value of the current pass
    """

    loss = 0
    with tf.GradientTape() as tape:
        en_outputs = encoder(source_seq, en_initial_states)
        en_states = en_outputs[1:]
        de_state_h, de_state_c = en_states

        # We need to create a loop to iterate through the target sequences
        for i in range(target_seq.shape[1]):
            # Input to the decoder must have shape of (batch_size, length)
            # so we need to expand one dimension
            decoder_in = tf.expand_dims(target_seq[:, i], 1)
            logit, de_state_h, de_state_c, _ = decoder(
                decoder_in, (de_state_h, de_state_c), en_outputs[0])

            # The loss is now accumulated through the whole batch
            loss += loss_func(target_seq[:, i], logit)

    variables = encoder.trainable_variables + decoder.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))

    return loss / target_seq.shape[1]
예제 #2
0
    def _get_loss(self, batch):
        self.model.train()
        batch = self._process_batch(batch)

        label_ids = batch['label_ids']
        logits = self.model(batch)
        loss = loss_func(logits, label_ids)

        return loss
예제 #3
0
    def _get_preds(self, batch):
        self.model.eval()
        batch = self._process_batch(batch)
        with torch.no_grad():
            logits = self.model(batch)
        label_ids = batch['label_ids']

        loss_val = loss_func(logits, label_ids).item()
        label_ids, pred_ids, probs = decode(label_ids, logits)

        return label_ids, pred_ids, probs, loss_val
예제 #4
0
def main(n_samples, n_features):
    data_path = "data_{}k_{}.tsv".format(n_samples, n_features)
    n_samples *= 1000

    
    points, features, headings = tsv_points_features(data_path)
    n_features = features.shape[-1]
    y = points
    X = features

    w = stochastic_gradient_descent(n_features, n_samples, X, y)

    loss = loss_func(X, y, w)
    print("Loss {} samples: {}".format(n_samples, loss))

    output(w, "Q3")
def train_step(source_seq, target_seq, en_initial_states):
    loss = 0
    with tf.GradientTape() as tape:
        en_outputs = encoder(source_seq, en_initial_states)
        en_states = en_outputs[1:]
        de_states = en_states

        de_outputs = decoder(target_seq, de_states)
        logits = de_outputs[0]
        loss = loss_func(target_seq, logits)

    variables = encoder.trainable_variables + decoder.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))

    return loss
예제 #6
0
def main(n_samples, n_features):
    data_path = "data_{}k_{}.tsv".format(n_samples, n_features)
    n_samples *= 1000

    points, features, headings = tsv_points_features(data_path)

    n_features = features.shape[-1]

    y = points
    X = features

    w = np.linalg.inv(X.T @ X) @ (X.T @ y)

    loss = loss_func(X, y, w)
    print("Loss:", loss)

    output(w, "Q1")
예제 #7
0
def pretrain_generator(model_dict,
                       optimizer_dict,
                       scheduler_dict,
                       dataloader,
                       vocab_size,
                       max_norm=5.0,
                       use_cuda=False):
    '''
    Get models, optimizers and schedulers.
    '''
    generator = model_dict["generator"]
    worker = generator.worker
    manager = generator.manager

    m_optimizer = optimizer_dict["manager"]
    w_optimizer = optimizer_dict["worker"]

    m_optimizer.zero_grad()
    w_optimizer.zero_grad()

    m_lr_scheduler = scheduler_dict["manager"]
    w_lr_scheduler = scheduler_dict["worker"]
    '''
    Perform pretrain step for real data.
    '''
    for i, sample in enumerate(dataloader):
        m_lr_scheduler.step()
        w_lr_scheduler.step()

        sample = Variable(sample)
        if use_cuda:
            sample = sample.cuda(async=True)

        # Calculate pretrain loss.
        pre_rets = recurrent_func("pre")(model_dict, sample, use_cuda)
        real_goal = pre_rets["real_goal"]
        prediction = pre_rets["prediction"]
        delta_feature = pre_rets["delta_feature"]

        m_loss = loss_func("pre_manager")(real_goal, delta_feature)
        torch.autograd.grad(m_loss, manager.parameters())
        clip_grad_norm(manager.parameters(), max_norm=max_norm)
        m_optimizer.step()
        m_optimizer.zero_grad()

        w_loss = loss_func("pre_worker")(sample, prediction, vocab_size,
                                         use_cuda)
        torch.autograd.grad(w_loss, worker.parameters())
        clip_grad_norm(worker.parameters(), max_norm=max_norm)
        w_optimizer.step()
        w_optimizer.zero_grad()
    '''
    Update model_dict, optimizer_dict and scheduler_dict.
    '''
    generator.worker = worker
    generator.manager = manager
    model_dict["generator"] = generator

    optimizer_dict["manager"] = m_optimizer
    optimizer_dict["worker"] = w_optimizer

    scheduler_dict["manager"] = m_lr_scheduler
    scheduler_dict["worker"] = w_lr_scheduler

    return model_dict, optimizer_dict, scheduler_dict
예제 #8
0
def adversarial_train(model_dict,
                      optimizer_dict,
                      scheduler_dict,
                      dis_dataloader_params,
                      vocab_size,
                      pos_file,
                      neg_file,
                      batch_size,
                      gen_train_num=1,
                      dis_train_epoch=5,
                      dis_train_num=3,
                      max_norm=5.0,
                      rollout_num=4,
                      use_cuda=False,
                      temperature=1.0,
                      epoch=1,
                      tot_epoch=100):
    """
        Get all the models, optimizer and schedulers
    """
    generator = model_dict["generator"]
    discriminator = model_dict["discriminator"]
    worker = generator.worker
    manager = generator.manager

    m_optimizer = optimizer_dict["manager"]
    w_optimizer = optimizer_dict["worker"]
    d_optimizer = optimizer_dict["discriminator"]

    #Why zero grad only m and w?
    m_optimizer.zero_grad()
    w_optimizer.zero_grad()

    m_lr_scheduler = scheduler_dict["manager"]
    w_lr_scheduler = scheduler_dict["worker"]
    d_lr_scheduler = scheduler_dict["discriminator"]

    #Adversarial training for generator
    for _ in range(gen_train_num):
        m_lr_scheduler.step()
        w_lr_scheduler.step()

        m_optimizer.zero_grad()
        w_optimizer.zero_grad()

        #get all the return values
        adv_rets = recurrent_func("adv")(model_dict, use_cuda)
        real_goal = adv_rets["real_goal"]
        all_goal = adv_rets["all_goal"]
        prediction = adv_rets["prediction"]
        delta_feature = adv_rets["delta_feature"]
        delta_feature_for_worker = adv_rets["delta_feature_for_worker"]
        gen_token = adv_rets["gen_token"]

        rewards = get_rewards(model_dict, gen_token, rollout_num, use_cuda)
        m_loss = loss_func("adv_manager")(rewards, real_goal, delta_feature)
        w_loss = loss_func("adv_worker")(all_goal, delta_feature_for_worker,
                                         gen_token, prediction, vocab_size,
                                         use_cuda)

        torch.autograd.grad(
            m_loss,
            manager.parameters())  #based on loss improve the parameters
        torch.autograd.grad(w_loss, worker.parameters())
        clip_grad_norm_(manager.parameters(), max_norm)
        clip_grad_norm_(worker.parameters(), max_norm)
        m_optimizer.step()
        w_optimizer.step()
        print("Adv-Manager loss: {:.5f} Adv-Worker loss: {:.5f}".format(
            m_loss, w_loss))

    del adv_rets
    del real_goal
    del all_goal
    del prediction
    del delta_feature
    del delta_feature_for_worker
    del gen_token
    del rewards

    #Adversarial training for discriminator
    for n in range(dis_train_epoch):
        generate_samples(model_dict, neg_file, batch_size, use_cuda,
                         temperature)
        dis_dataloader_params["positive_filepath"] = pos_file
        dis_dataloader_params["negative_filepath"] = neg_file
        dataloader = dis_data_loader(**dis_dataloader_params)

        cross_entropy = nn.CrossEntropyLoss()
        if use_cuda:
            cross_entropy = cross_entropy.cuda()
        """
        for d-steps do
            Use current G, θm,θw to generate negative examples and combine with given positive examples S 
            Train discriminator Dφ for k epochs by Eq. (2)
        end for
        """
        for _ in range(dis_train_num):
            for i, sample in enumerate(dataloader):
                data, label = sample["data"], sample["label"]
                data = Variable(data)
                label = Variable(label)
                if use_cuda:
                    data = data.cuda(async=True)
                    label = label.cuda(async=True)
                outs = discriminator(data)
                loss = cross_entropy(outs["score"],
                                     label.view(-1)) + discriminator.l2_loss()
                d_optimizer.zero_grad()
                d_lr_scheduler.step()
                loss.backward()
                d_optimizer.step()
        print("{}/{} Adv-Discriminator Loss: {:.5f}".format(
            n, range(dis_train_epoch), loss))
    #Save all changes
    model_dict["discriminator"] = discriminator
    generator.worker = worker
    generator.manager = manager
    model_dict["generator"] = generator

    optimizer_dict["manager"] = m_optimizer
    optimizer_dict["worker"] = w_optimizer
    optimizer_dict["discriminator"] = d_optimizer

    scheduler_dict["manager"] = m_lr_scheduler
    scheduler_dict["worker"] = w_lr_scheduler
    scheduler_dict["disciminator"] = d_lr_scheduler

    return model_dict, optimizer_dict, scheduler_dict
예제 #9
0
def pretrain_generator(model_dict,
                       optimizer_dict,
                       scheduler_dict,
                       dataloader,
                       vocab_size,
                       max_norm=5.0,
                       use_cuda=False,
                       epoch=1,
                       tot_epochs=100):
    #get the models of generator
    generator = model_dict["generator"]
    worker = generator.worker
    manager = generator.manager
    #get the optimizers
    m_optimizer = optimizer_dict["manager"]
    w_optimizer = optimizer_dict["worker"]

    m_optimizer.zero_grad()
    w_optimizer.zero_grad()

    m_lr_scheduler = scheduler_dict["manager"]
    w_lr_scheduler = scheduler_dict["worker"]
    """
     Perform pretrain step for real data
    """

    for i, sample in enumerate(dataloader):
        #print("DataLoader: {}".format(dataloader))
        m_lr_scheduler.step()
        w_lr_scheduler.step()

        sample = Variable(sample)
        if use_cuda:
            sample = sample.cuda(async=True)

        # Calculate pretrain loss
        if (
                sample.size() == torch.zeros([64, 20]).size()
        ):  #sometimes smaller than 64 (16) is passed, so this if statement disables it
            #print("Sample size: {}".format(sample.size()))
            pre_rets = recurrent_func("pre")(model_dict, sample, use_cuda)
            real_goal = pre_rets["real_goal"]
            prediction = pre_rets["prediction"]
            delta_feature = pre_rets["delta_feature"]

            m_loss = loss_func("pre_manager")(real_goal, delta_feature)
            torch.autograd.grad(m_loss, manager.parameters())
            clip_grad_norm_(manager.parameters(), max_norm=max_norm)
            m_optimizer.step()
            m_optimizer.zero_grad()

            w_loss = loss_func("pre_worker")(sample, prediction, vocab_size,
                                             use_cuda)
            torch.autograd.grad(w_loss, worker.parameters())
            clip_grad_norm_(worker.parameters(), max_norm=max_norm)
            w_optimizer.step()
            w_optimizer.zero_grad()
            if i == 63:
                print("Pre-Manager Loss: {:.5f}, Pre-Worker Loss: {:.5f}\n".
                      format(m_loss, w_loss))
    """
    Update model_dict, optimizer_dict, and scheduler_dict
    """

    generator.woroker = worker
    generator.manager = manager
    model_dict["generator"] = generator

    optimizer_dict["manager"] = m_optimizer
    optimizer_dict["worker"] = w_optimizer

    scheduler_dict["manager"] = m_lr_scheduler
    scheduler_dict["worker"] = w_lr_scheduler

    return model_dict, optimizer_dict, scheduler_dict
예제 #10
0
파일: main.py 프로젝트: prof-Jian/J_lab
def pretrain_generator(model_dict,
                       optimizer_dict,
                       scheduler_dict,
                       dataloader,
                       vocab_size,
                       max_norm=5.0,
                       use_cuda=False,
                       epoch=1,
                       tot_epochs=100):
    #get the models of generator
    generator = model_dict["generator"]
    worker = generator.worker
    manager = generator.manager
    #get the optimizers
    m_optimizer = optimizer_dict["manager"]
    w_optimizer = optimizer_dict["worker"]

    m_optimizer.zero_grad()
    w_optimizer.zero_grad()

    m_lr_scheduler = scheduler_dict["manager"]
    w_lr_scheduler = scheduler_dict["worker"]
    """
     Perform pretrain step for real data
    """

    for i, sample in enumerate(dataloader):
        #print("DataLoader: {}".format(dataloader))
        m_lr_scheduler.step()
        w_lr_scheduler.step()

        sample = Variable(sample)
        if use_cuda:
            sample = sample.cuda()  #sample = sample.cuda()

        # Calculate pretrain loss
        if (
                sample.size() == torch.zeros([64, 20]).size()
        ):  #sometimes smaller than 64 (16) is passed, so this if statement disables it
            # 上面这一行能不能效率更高一些,只检测size[0]是否等于64就可以了
            #print("Sample size: {}".format(sample.size()))      其中sample:[batch_size , seq_len]
            pre_rets = recurrent_func("pre")(model_dict, sample, use_cuda)
            real_goal = pre_rets["real_goal"]
            prediction = pre_rets["prediction"]
            delta_feature = pre_rets["delta_feature"]

            #real_goal和delta求manager的loss,prediction和sample求worker的loss
            m_loss = loss_func("pre_manager")(real_goal, delta_feature)
            torch.autograd.grad(m_loss, manager.parameters()
                                )  #这一行不会更改manager.parameters的grad呀???有什么用???
            clip_grad_norm_(manager.parameters(), max_norm=max_norm)
            m_optimizer.step()
            m_optimizer.zero_grad()
            # 现在我的理解,上面4行:前两行只是为了裁剪梯度
            # 这里较平常的训练过程还有一点区别,平常一般是optimizer.zero_grad() -> loss -> loss.backward() -> optimizer.step()
            # 这里不需要loss.backward()吗?

            w_loss = loss_func("pre_worker")(sample, prediction, vocab_size,
                                             use_cuda)
            torch.autograd.grad(w_loss, worker.parameters(
            ))  #这里是求d(w_loss)/d(worker.parameters),但是这里又并不把结果分给任何一个变量
            clip_grad_norm_(worker.parameters(),
                            max_norm=max_norm)  #这个超参被设定为5,为什么?
            w_optimizer.step()
            w_optimizer.zero_grad()
            if i == 63:
                print("Pre-Manager Loss: {:.5f}, Pre-Worker Loss: {:.5f}\n".
                      format(m_loss, w_loss))
    """
    Update model_dict, optimizer_dict, and scheduler_dict
    """

    generator.woroker = worker
    generator.manager = manager
    model_dict["generator"] = generator

    optimizer_dict["manager"] = m_optimizer
    optimizer_dict["worker"] = w_optimizer

    scheduler_dict["manager"] = m_lr_scheduler
    scheduler_dict["worker"] = w_lr_scheduler

    return model_dict, optimizer_dict, scheduler_dict
예제 #11
0
def run(trainFile, trainLabelFile, testFile, testLabelFile, groupFile, suspFile, featureDistribution, loss):
    # reset graph                                                                                                                                            
    tf.reset_default_graph()    
    # Network Parameters                                                                                                                                     
    n_input = numpy.array(featureDistribution).max()
    n_steps = len(featureDistribution)
    n_hidden = numpy.array(featureDistribution).max()
    n_classes = 2 # number of output classes                                                                                                                 

    # tf Graph input                                                                                                                                         
    x = tf.placeholder("float", [None, n_steps, n_input])
    y = tf.placeholder("float", [None, n_classes])
    g = tf.placeholder(tf.int32, [None, 1])

    # dropout                                                                                                                                                 
    keep_prob = tf.placeholder(tf.float32)

    # Define weights                                                                                                                                          
    weights = {
        # Hidden layer weights => 2*n_hidden because of forward + backward cells
        'out': tf.Variable(tf.random_normal([2*n_hidden, n_classes]))
    }
    biases = {
        'out': tf.Variable(tf.random_normal([n_classes]))
    }

    pred = BiRNN(x, weights, biases, n_hidden, n_steps, keep_prob)

    # Evaluate model
    correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) 

    # load datasets
    datasets = input.read_data_sets(trainFile,trainLabelFile, testFile,testLabelFile, groupFile)
    
    # load test data
    test_data=myrnn.fillMatrix(datasets.test.instances,featureDistribution)
    test_data = test_data.reshape((-1, n_steps, n_input))
    test_label = datasets.test.labels

    # Define loss and optimizer                                                                                                                                                                             
    variables  = tf.trainable_variables()
    regularizer = tf.add_n([ tf.nn.l2_loss(v) for v in variables if 'bias' not in v.name]) * L2_value  # l2 regularization   
    cost = ut.loss_func(pred, y, loss, datasets, g)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost+regularizer)

    init = tf.global_variables_initializer()

    # Launch the graph
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        sess.run(init)
        step = 1
        # Keep training until reach max iterations
        #while step * batch_size < training_epochs*:
        total_batch = int(datasets.train.num_instances/batch_size)
        for epoch in range(training_epochs):
            avg_cost = 0.
            # Loop over all batches
            for i in range(total_batch):
                batch_x, batch_y, batch_g = datasets.train.next_batch(batch_size)
                # Reshape data to get 28 seq of 28 elements
                batch_x = myrnn.fillMatrix(batch_x,featureDistribution)
                batch_x = batch_x.reshape((batch_size, n_steps, n_input))
                # Run optimization op (backprop)
                _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y, g: batch_g, keep_prob:dropout_rate})
                # Compute average loss
                avg_cost += c / total_batch
            if epoch % display_step == 0 and i==(total_batch-1):
                    print("Epoch " + str(epoch+1) + ", cost = " + "{:.6f}".format(avg_cost))
            if epoch % (dump_step) == (dump_step-1):
                res=sess.run(tf.nn.softmax(pred),feed_dict={x: test_data, y: test_label, keep_prob:1.0})
                with open(suspFile+'-'+str(epoch+1),'w') as f:
                    for susp in res[:,0]:
                        f.write(str(susp)+'\n')
        print("Optimization Finished!")
예제 #12
0
def train(float_model, predict, pred_dir, tblogs_dir, batchsize, learnrate,
          epochs):
    '''
    Variational encoder model
    '''

    image_dim = 28
    image_chan = 1
    input_layer = Input(shape=(image_dim, image_dim, image_chan))
    encoder_mu, encoder_log_variance, encoder_z = encoder.call(input_layer)

    dec_out = decoder.call(encoder_z)
    model = Model(inputs=input_layer, outputs=dec_out)
    '''
    Prepare MNIST dataset
    '''
    x_train, x_test, x_train_noisy, x_test_noisy = mnist_download()
    train_dataset = input_fn((x_train_noisy, x_train), batchsize, True)
    test_dataset = input_fn((x_test_noisy, x_test), batchsize, False)
    predict_dataset = input_fn((x_test_noisy), batchsize, False)
    '''
    Call backs
    '''
    tb_call = TensorBoard(log_dir=tblogs_dir)
    chkpt_call = ModelCheckpoint(filepath=float_model,
                                 monitor='val_mse',
                                 mode='min',
                                 verbose=1,
                                 save_weights_only=False,
                                 save_best_only=True)

    callbacks_list = [tb_call, chkpt_call]
    '''
    Compile
    '''
    model.compile(optimizer=Adam(lr=learnrate),
                  loss=lambda y_true, y_predict: loss_func(
                      y_true, y_predict, encoder_mu, encoder_log_variance),
                  metrics=['mse'])
    '''
    Training
    '''
    print(_DIVIDER)
    print('Training...')
    print(_DIVIDER)
    # make folder for saving trained model checkpoint
    os.makedirs(os.path.dirname(float_model), exist_ok=True)

    # remake Tensorboard logs folder
    shutil.rmtree(tblogs_dir, ignore_errors=True)
    os.makedirs(tblogs_dir)

    train_history = model.fit(train_dataset,
                              epochs=epochs,
                              steps_per_epoch=len(x_train) // batchsize,
                              validation_data=test_dataset,
                              callbacks=callbacks_list,
                              verbose=1)
    '''
    Predictions
    '''
    if (predict):
        print(_DIVIDER)
        print('Making predictions...')
        print(_DIVIDER)
        # remake predictions folder
        shutil.rmtree(pred_dir, ignore_errors=True)
        os.makedirs(pred_dir)

        with custom_object_scope({'Sampling': Sampling}):
            model = load_model(float_model,
                               compile=False,
                               custom_objects={'Sampling': Sampling})
        model.compile(loss=lambda y_true, y_predict: loss_func(
            y_true, y_predict, encoder_mu, encoder_log_variance))
        predictions = model.predict(predict_dataset, verbose=1)

        # scale pixel values back up to range 0:255 then save as PNG
        for i in range(20):
            cv2.imwrite(pred_dir + '/pred_' + str(i) + '.png',
                        predictions[i] * 255.0)
            cv2.imwrite(pred_dir + '/input_' + str(i) + '.png',
                        x_test_noisy[i] * 255.0)
        print('Inputs and Predictions saved as images in ./' + pred_dir)

    print(
        "\nTensorBoard can be opened with the command: tensorboard --logdir=./tb_logs --host localhost --port 6006"
    )

    return
예제 #13
0
def run(trainFile, trainLabelFile, testFile, testLabelFile, groupFile,
        suspFile, loss, featureNum, nodeNum):
    tf.reset_default_graph()
    # Network Parameters
    n_classes = 2  #  total output classes (0 or 1)
    n_input = featureNum  # total number of input features
    n_hidden_1 = nodeNum  # 1st layer number of nodes
    train_writer = tf.summary.FileWriter("./log", graph=tf.get_default_graph())
    # tf Graph input
    x = tf.placeholder("float", [None, 226])
    spec = tf.placeholder("float", [None, 34])
    mutation1 = tf.placeholder("float", [None, 35])
    mutation2 = tf.placeholder("float", [None, 35])
    mutation3 = tf.placeholder("float", [None, 35])
    mutation4 = tf.placeholder("float", [None, 35])
    mutation = tf.placeholder("float", [None, 140])
    complexity = tf.placeholder("float", [None, 37])
    similarity = tf.placeholder("float", [None, 15])
    y = tf.placeholder("float", [None, n_classes])
    g = tf.placeholder(tf.int32, [None, 1])
    is_training = tf.placeholder(tf.bool, name='is_training')

    # dropout parameter
    keep_prob = tf.placeholder(tf.float32)

    # Construct model
    pred = mutation_spec_first(spec, mutation1, mutation2, mutation3,
                               mutation4, complexity, similarity, keep_prob,
                               is_training)
    datasets = input.read_data_sets(trainFile, trainLabelFile, testFile,
                                    testLabelFile, groupFile)

    # Define loss and optimizer
    regu_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    y = tf.stop_gradient(y)
    cost = ut.loss_func(pred, y, loss, datasets, g)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    summary_op = tf.summary.merge_all()
    with tf.control_dependencies(update_ops):
        optimizer = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(cost + regu_losses)

    # Initializing the variables
    init = tf.global_variables_initializer()

    # Launch the graph
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        sess.run(init)

        # Training cycle
        for epoch in range(training_epochs):
            avg_cost = 0.
            total_batch = int(datasets.train.num_instances / batch_size)
            # Loop over all batches
            for i in range(total_batch):
                batch_x, batch_y, batch_g = datasets.train.next_batch(
                    batch_size)
                # Run optimization op (backprop) and cost op (to get loss value)

                _, c, regu_loss = sess.run(
                    [optimizer, cost, regu_losses],
                    feed_dict={
                        spec: batch_x[:, :34],
                        mutation1: batch_x[:, 34:69],
                        mutation2: batch_x[:, 69:104],
                        mutation3: batch_x[:, 104:139],
                        mutation4: batch_x[:, 139:174],
                        complexity: batch_x[:, 174:211],
                        similarity: batch_x[:, -15:],
                        y: batch_y,
                        g: batch_g,
                        keep_prob: dropout_rate,
                        is_training: True
                    })
                # Compute average loss
                avg_cost += c / total_batch
            # Display logs per epoch step

            if epoch % display_step == 0:
                print("Epoch:", '%04d' % (epoch+1), "cost=", \
                    "{:.9f}".format(avg_cost),", l2 loss= ",numpy.sum(regu_loss))

            if epoch % dump_step == (dump_step - 1):
                #Write Result

                res, step_summary = sess.run(
                    [tf.nn.softmax(pred), summary_op],
                    feed_dict={
                        spec: datasets.test.instances[:, :34],
                        mutation1: datasets.test.instances[:, 34:69],
                        mutation2: datasets.test.instances[:, 69:104],
                        mutation3: datasets.test.instances[:, 104:139],
                        mutation4: datasets.test.instances[:, 139:174],
                        complexity: datasets.test.instances[:, 174:211],
                        similarity: datasets.test.instances[:, -15:],
                        y: datasets.test.labels,
                        keep_prob: 1.0,
                        is_training: False
                    })
                train_writer.add_summary(step_summary)
                with open(suspFile + '-' + str(epoch + 1), 'w') as f:
                    for susp in res[:, 0]:
                        f.write(str(susp) + '\n')
예제 #14
0
    dataloader = DataLoader(dataset,
                            batch_size=cfg.TRAIN_BATCH_SIZE,
                            shuffle=True,
                            num_workers=cfg.NUM_WORKERS)

    net = Net().to(device)
    optimizer = torch.optim.Adam(net.parameters())

    for epoch in range(cfg.EPOCH):
        for i, (target_13, target_26, target_52,
                input) in enumerate(dataloader):
            target_13, target_26, target_52, input = target_13.to(
                device), target_26.to(device), target_52.to(device), input.to(
                    device)
            output_13, output_26, output_52 = net(input)

            loss_13 = utils.loss_func(output_13, target_13, cfg.alpha)
            loss_26 = utils.loss_func(output_26, target_26, cfg.alpha)
            loss_52 = utils.loss_func(output_52, target_52, cfg.alpha)

            loss = loss_13 + loss_26 + loss_52  # 定义损失

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print(" loss:", loss.item(), " loss_13:", loss_13.item(),
                  " loss_26:", loss_26.item(), "loss_52", loss_52.item())
            if epoch % 10 == 0:
                torch.save(net, "net.pth")
                print("epoch {} save success".format(epoch))
예제 #15
0
파일: test.py 프로젝트: xiandshi/Music
def test_loss_func(use_cuda=False):
    '''
    Prepare model_dict.
    '''
    model_dict = prepare_model_dict(use_cuda)
    generator = model_dict["generator"]
    worker = generator.worker
    manager = generator.manager
    '''
    Prepare some fake data.
    '''
    dataloader = prepare_fake_data()
    '''
    Start testing all recurrent functions.
    '''

    m_optimizer = optim.Adam(manager.parameters(), lr=0.001)
    w_optimizer = optim.Adam(worker.parameters(), lr=0.001)

    m_optimizer.zero_grad()
    w_optimizer.zero_grad()
    for i, sample in enumerate(dataloader):
        sample = Variable(sample)
        if use_cuda:
            sample = sample.cuda(async=True)

        # Test pre.
        pre_rets = recurrent_func("pre")(model_dict, sample, use_cuda)
        real_goal = pre_rets["real_goal"]
        prediction = pre_rets["prediction"]
        delta_feature = pre_rets["delta_feature"]

        m_loss = loss_func("pre_manager")(real_goal, delta_feature)
        torch.autograd.grad(m_loss, manager.parameters())
        nn.utils.clip_grad_norm(manager.parameters(), max_norm=5.0)
        m_optimizer.step()
        m_optimizer.zero_grad()

        w_loss = loss_func("pre_worker")(sample, prediction, 5000, use_cuda)
        torch.autograd.grad(w_loss, worker.parameters())
        nn.utils.clip_grad_norm(worker.parameters(), max_norm=5.0)
        w_optimizer.step()
        w_optimizer.zero_grad()
        print("pre_m_loss={}, pre_w_loss={}".format(m_loss.data[0],
                                                    w_loss.data[0]))
        print("Pretrain loss function test  finished!")
        print("\n")

        # Test adv.
        adv_rets = recurrent_func('adv')(model_dict, use_cuda)
        real_goal = adv_rets["real_goal"]
        all_goal = adv_rets["all_goal"]
        prediction = adv_rets["prediction"]
        delta_feature = adv_rets["delta_feature"]
        delta_feature_for_worker = adv_rets["delta_feature_for_worker"]
        gen_token = adv_rets["gen_token"]
        rewards = get_rewards(model_dict, gen_token, 4, use_cuda)

        m_loss = loss_func("adv_manager")(rewards, real_goal, delta_feature)
        w_loss = loss_func("adv_worker")(all_goal, delta_feature_for_worker,
                                         gen_token, prediction, 5000, use_cuda)

        m_optimizer = optim.Adam(manager.parameters(), lr=0.001)
        w_optimizer = optim.Adam(worker.parameters(), lr=0.001)

        m_optimizer.zero_grad()
        w_optimizer.zero_grad()

        torch.autograd.grad(m_loss, manager.parameters())
        torch.autograd.grad(w_loss, worker.parameters())
        nn.utils.clip_grad_norm(manager.parameters(), max_norm=5.0)
        nn.utils.clip_grad_norm(worker.parameters(), max_norm=5.0)
        m_optimizer.step()
        w_optimizer.step()

        print("adv_m_loss={}, adv_w_loss={}".format(m_loss.data[0],
                                                    w_loss.data[0]))
        print("Adversarial training loss function test finished!")
        print("\n")

        if i > 0:
            break
예제 #16
0
def adversarial_train(model_dict,
                      optimizer_dict,
                      scheduler_dict,
                      dis_dataloader_params,
                      vocab_size,
                      positive_file,
                      negative_file,
                      num_batches,
                      gen_train_num=1,
                      dis_train_epoch=5,
                      dis_train_num=3,
                      max_norm=5.0,
                      rollout_num=4,
                      use_cuda=False,
                      temperature=1.0):
    '''
    Get models, optimizers and schedulers.
    '''
    generator = model_dict["generator"]
    discriminator = model_dict["discriminator"]
    worker = generator.worker
    manager = generator.manager

    m_optimizer = optimizer_dict["manager"]
    w_optimizer = optimizer_dict["worker"]
    d_optimizer = optimizer_dict["discriminator"]

    m_optimizer.zero_grad()
    w_optimizer.zero_grad()

    m_lr_scheduler = scheduler_dict["manager"]
    w_lr_scheduler = scheduler_dict["worker"]
    d_lr_scheduler = scheduler_dict["discriminator"]
    '''
    Adversarial train for generator.
    '''
    for _ in range(gen_train_num):
        m_lr_scheduler.step()
        w_lr_scheduler.step()

        m_optimizer.zero_grad()
        w_optimizer.zero_grad()

        adv_rets = recurrent_func('adv')(model_dict, use_cuda)
        real_goal = adv_rets["real_goal"]
        all_goal = adv_rets["all_goal"]
        prediction = adv_rets["prediction"]
        delta_feature = adv_rets["delta_feature"]
        delta_feature_for_worker = adv_rets["delta_feature_for_worker"]
        gen_token = adv_rets["gen_token"]
        rewards = get_rewards(model_dict, gen_token, rollout_num, use_cuda)

        m_loss = loss_func("adv_manager")(rewards, real_goal, delta_feature)
        w_loss = loss_func("adv_worker")(all_goal, delta_feature_for_worker,
                                         gen_token, prediction, vocab_size,
                                         use_cuda)

        torch.autograd.grad(m_loss, manager.parameters())
        torch.autograd.grad(w_loss, worker.parameters())
        clip_grad_norm(manager.parameters(), max_norm=max_norm)
        clip_grad_norm(worker.parameters(), max_norm=max_norm)
        m_optimizer.step()
        w_optimizer.step()

    del adv_rets
    del real_goal
    del all_goal
    del prediction
    del delta_feature
    del delta_feature_for_worker
    del gen_token
    del rewards
    '''
    Adversarial train for discriminator.
    '''
    for _ in range(dis_train_epoch):
        generate_samples(model_dict, negative_file, num_batches, use_cuda,
                         temperature)
        dis_dataloader_params["positive_filepath"] = positive_file
        dis_dataloader_params["negative_filepath"] = negative_file
        dataloader = dis_data_loader(**dis_dataloader_params)

        cross_entropy = nn.CrossEntropyLoss()
        if use_cuda:
            cross_entropy = cross_entropy.cuda()

        for _ in range(dis_train_num):
            for i, sample in enumerate(dataloader):
                data, label = sample["data"], sample["label"]
                data = Variable(data)
                label = Variable(label)
                if use_cuda:
                    data = data.cuda(async=True)
                    label = label.cuda(async=True)
                outs = discriminator(data)
                loss = cross_entropy(outs["score"], label.view(-1)) + \
                       discriminator.l2_loss()
                d_optimizer.zero_grad()
                d_lr_scheduler.step()
                loss.backward()
                d_optimizer.step()

    model_dict["discriminator"] = discriminator
    generator.worker = worker
    generator.manager = manager
    model_dict["generator"] = generator

    optimizer_dict["manager"] = m_optimizer
    optimizer_dict["worker"] = w_optimizer
    optimizer_dict["discriminator"] = d_optimizer

    scheduler_dict["manager"] = m_lr_scheduler
    scheduler_dict["worker"] = w_lr_scheduler
    scheduler_dict["discriminator"] = d_lr_scheduler

    return model_dict, optimizer_dict, scheduler_dict
예제 #17
0
def run(trainFile, trainLabelFile, testFile,testLabelFile, groupFile, suspFile,loss, featureNum, nodeNum):
    tf.reset_default_graph()
    # Network Parameters
    n_classes = 2 #  total output classes (0 or 1)
    n_input = featureNum # total number of input features
    n_hidden_1 = nodeNum # 1st layer number of nodes                                                                       
    
    # tf Graph input
    x = tf.placeholder("float", [None, n_input])
    y = tf.placeholder("float", [None, n_classes])
    g = tf.placeholder(tf.int32, [None, 1])
  
    # dropout parameter
    keep_prob = tf.placeholder(tf.float32)

    # Store layers weight & bias
    weights = {
        'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
        'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes]))
    }
    biases = {
        'b1': tf.Variable(tf.random_normal([n_hidden_1])),
        'out': tf.Variable(tf.random_normal([n_classes]))
    }

    # Construct model
    pred = multilayer_perceptron(x, weights, biases, keep_prob)
   
    datasets=input.read_data_sets(trainFile, trainLabelFile, testFile, testLabelFile, groupFile)


    # Define loss and optimizer                                                                                                                                                                               
    variables  = tf.trainable_variables()
    regularizer = (tf.nn.l2_loss(weights['h1'])+tf.nn.l2_loss(weights['out'])) * L2_value   # l2 regularization               
    cost = ut.loss_func(pred, y, loss, datasets,g)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost+regularizer)

    # Initializing the variables
    init = tf.global_variables_initializer()

    # Launch the graph
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        sess.run(init)

        # Training cycle
        for epoch in range(training_epochs):
            avg_cost = 0.
            total_batch = int(datasets.train.num_instances/batch_size)
            # Loop over all batches
            for i in range(total_batch):
                batch_x, batch_y ,batch_g= datasets.train.next_batch(batch_size)
                # Run optimization op (backprop) and cost op (to get loss value)
                _, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
                                                              y: batch_y, g: batch_g, keep_prob: dropout_rate})
                # Compute average loss
                avg_cost += c / total_batch
            # Display logs per epoch step
            if epoch % display_step == 0:
                print("Epoch:", '%04d' % (epoch+1), "cost=", \
                    "{:.9f}".format(avg_cost))
            if epoch % dump_step ==(dump_step-1):
                #Write Result
                res=sess.run(tf.nn.softmax(pred),feed_dict={x: datasets.test.instances, y: datasets.test.labels, keep_prob: 1.0})
                with open(suspFile+'-'+str(epoch+1),'w') as f:
                    for susp in res[:,0]:
                        f.write(str(susp)+'\n')

        print("Optimization Finished!")
예제 #18
0
def main(args):
    dist.init_process_group(backend="nccl")
    torch.cuda.set_device(args.local_rank)

    ds = dataset(args.data_file, args.class_file, config)
    sampler = torch.utils.data.distributed.DistributedSampler(ds, shuffle=True)
    dl = DataLoader(ds,
                    batch_size=args.batch_size,
                    num_workers=args.num_workers,
                    collate_fn=collate_fn,
                    pin_memory=True,
                    drop_last=False,
                    sampler=sampler)

    batch_save_path = f"{args.model_dir}/batch_4.pth"
    epoch_save_path = f"{args.model_dir}/epoch_4.pth"

    net = model(config, ds.num_classes)
    if os.path.isfile(batch_save_path):
        log("载入模型中...", args.log_detail_path, args)
        try:
            net.load_state_dict(torch.load(batch_save_path))
            log("模型载入完成!", args.log_detail_path, args)
        except Exception as e:
            log(f"{e}\n载入模型失败: {batch_save_path}", args.log_detail_path, args)
    else:
        log(f"没找到模型: {batch_save_path}", args.log_detail_path, args)

    config["cuda"] = config["cuda"] and torch.cuda.is_available()
    if config["cuda"]:
        # net = torch.nn.DataParallel(net.cuda())
        net = torch.nn.parallel.DistributedDataParallel(
            net.cuda(), device_ids=[args.local_rank])
        log("cuda", args.log_detail_path, args)

    criterion = loss_func(config)

    optimizer = optim.Adam(net.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=1,
                                                     verbose=True,
                                                     factor=args.lr_decay,
                                                     threshold=1e-3)
    schedule_loss = []

    net.train()
    for epoch in range(1, args.epochs + 1):
        log(f"{'='*30}\n[{epoch}|{args.epochs}]", args.log_detail_path, args)
        for num_batch, batch_data in enumerate(dl, 1):
            t = time.time()
            loss, box_loss, landmark_loss, cls_loss = train_batch(
                net, batch_data, criterion, optimizer, config["cuda"], args)
            t = time.time() - t

            loss, box_loss, landmark_loss, cls_loss = [
                reduce_tensor(i).item()
                for i in [loss, box_loss, landmark_loss, cls_loss]
            ]

            msg = f"  [{epoch}|{args.epochs}] num_batch:{num_batch}" \
                + f" loss:{loss:.4f} box_loss:{box_loss:.4f} landmark_loss:{landmark_loss:.4f} cls_loss:{cls_loss:.4f} time:{t*1000:.1f}ms"
            log(msg, args.log_detail_path, args)
            if num_batch % args.num_show == 0:
                log(msg, args.log_path, args)

            if args.local_rank == 0:
                if num_batch % args.num_save == 0:
                    save_model(net, batch_save_path)

                schedule_loss += [loss]
                if num_batch % args.num_adjuest_lr == 0:
                    scheduler.step(np.mean(schedule_loss))
                    schedule_loss = []
        if args.local_rank == 0:
            save_model(net, epoch_save_path)