示例#1
0
def captioning(image_path):
    
    loader = data_loader(
        features_shape=2048,
        attention_features_shape=64,
        batch_size=256,
        buffer_size=1000,
        top_k=5000
    )
    

    ## loadm odel and checkpoint 
    embedding_matrix = np.load("./content/drive/My Drive/datasets/embeddingmatrix.npy")
    encoder = Encoder(200)
    decoder = Decoder(embedding_dim=200, vocab_size=loader.top_k + 1, units=512, embedding_matrix = embedding_matrix)
    optimizer = tf.keras.optimizers.Adam()
    checkpoint_path = "./content/drive/My Drive/datasets/modelcheckpoint/embedding"
    ckpt = tf.train.Checkpoint(encoder=encoder, decoder=decoder, optimizer=optimizer)
    ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=3)
    if ckpt_manager.latest_checkpoint:
        ckpt.restore(ckpt_manager.latest_checkpoint)

    ## inference time
    result, _ = evaluate(
        encoder,
        decoder,
        loader.tokenizer,
        loader.max_length,
        loader.attention_features_shape,
        image_path
    )
    result = " ".join(result)
    return result
    
示例#2
0
def test(args):

    ##  Load data & Create batch
    clean_data, noisy_data = dt.data_loader(test=True, need_length=True)
    # Batch
    #  - Proccessing speech interval can be adjusted by "start_frame" and "start_frame".
    #  - "None" -> All speech in test dataset.
    baches_test = dt.create_batch_test(clean_data, noisy_data, start_frame=None, stop_frame=None)
    del clean_data, noisy_data

    ##  Create network
    # Variables
    noisy_t     = nn.Variable(baches_test.noisy.shape)          # Input
    z           = nn.Variable([baches_test.noisy.shape[0], 1024, 8])  # Random Latent Variable
    # Network (Only Generator)
    output_t = Generator(noisy_t, z)

    ##  Load parameter
    # load generator
    with nn.parameter_scope("gen"):
        print(args.epoch)
        nn.load_parameters(os.path.join(args.model_save_path, "generator_param_{:04}.h5".format(args.epoch)))

    ##  Validation
    noisy_t.d = baches_test.noisy
    #z.d = np.random.randn(*z.shape)
    z.d = np.zeros(z.shape)             # zero latent valiables

    output_t.forward()

    ##  Create wav files
    dt.wav_write('clean.wav', baches_test.clean.flatten(), fs=16000)
    dt.wav_write('input_segan.wav', baches_test.noisy.flatten(), fs=16000)
    dt.wav_write('output_segan.wav', output_t.d.flatten(), fs=16000)
    print('finish!')
示例#3
0
def train_img(args, G):
    """ train function """

    since = time.time()
    """ Start iteration """
    for epoch in range(1, args.epoch_max + 1):
        """ run 1 epoch and get loss """
        train_loader, valid_loader, test_loader = data_loader(args)
        train_loss = iteration(args, G, train_loader, phase="train")
        valid_loss = iteration(args, G, valid_loader, phase="valid")
        """ Print loss """
        if (epoch % args.save_cycle_of_loss) == 0:
            print_loss(epoch, time.time() - since, train_loss, valid_loss)
            record_on_csv(args, epoch,
                          time.time() - since, train_loss, valid_loss)
        """ Print image """
        if (epoch % args.save_cycle_of_images) == 0:
            # visualize_conv_layer(args, G)
            test(args, G, test_loader, epoch)
        """ Change the ratio of losses """
        # if epoch == args.change_cycle_of_loss_ratio:
        #    args.loss_ratio = 0
        """ Decay Learning Rate """
        # if (epoch % args.decay_cycle_of_learning_rate) == 0:
        #     args.learning_rate = args.learning_rate/args.decay_coefficient_of_learning_rate
        """ Save model """
        if (epoch % args.save_cycle_of_models) == 0:
            torch.save(
                G.state_dict(), args.save_path_of_models +
                "/HGN_train_continued" + str(epoch) + ".pt")

    print('======================[ train finished ]======================')
示例#4
0
def main():

    print("load csv into a pandas dataframe")
    dt = data_loader()
    data = dt.load_data()
    print(f"data has {data.shape}")
    data = dt.encode_target(data)
    print(
        "preprocess data by removing outliers and encoding feature variables")
    data = dt.preprocess(data)
    #print(data.columns)
    print(
        "scale data using standardscaler and encoding using pandas get_dummies"
    )
    data = dt.scale_columns(data)
    print(f"data contains {data.columns}")

    sam = resample_data()
    data = sam.under_sample(data)

    print(data['y'].value_counts())

    s = split()
    data = s.train_test(data)
    print(data[0].shape)

    classifiers_cv(data[0], data[1], data[2], data[3])
示例#5
0
def test(args):
    """
    Training
    """

    ##  ~~~~~~~~~~~~~~~~~~~
    ##   Initial settings
    ##  ~~~~~~~~~~~~~~~~~~~

    #   Input Variable
    nn.clear_parameters()  # Clear
    Input = nn.Variable([1, 3, 64, 64])  # Input
    Trues = nn.Variable([1, 1])  # True Value

    #   Network Definition
    Name = "CNN"  # Name of scope which includes network models (arbitrary)
    Output_test = network(Input, scope=Name, test=True)  # Network & Output
    Loss_test = F.mean(F.absolute_error(
        Output_test, Trues))  # Loss Function (Squared Error)

    #   Load data
    with nn.parameter_scope(Name):
        nn.load_parameters(
            os.path.join(args.model_save_path,
                         "network_param_{:04}.h5".format(args.epoch)))

    # Training Data Setting
    image_data, mos_data = dt.data_loader(test=True)
    batches = dt.create_batch(image_data, mos_data, 1)
    del image_data, mos_data

    truth = []
    result = []
    for j in range(batches.iter_n):
        Input.d, tures = next(batches)
        Loss_test.forward(clear_no_need_grad=True)
        result.append(Loss_test.d)
        truth.append(tures)

    result = np.array(result)
    truth = np.squeeze(np.array(truth))

    # Evaluation of performance
    mae = np.average(np.abs(result - truth))
    SRCC, p1 = stats.spearmanr(truth,
                               result)  # Spearman's Correlation Coefficient
    PLCC, p2 = stats.pearsonr(truth, result)

    #   Display
    print("\n Model Parameter [epoch={0}]".format(args.epoch))
    print(" Mean Absolute Error with Truth: {0:.4f}".format(mae))
    print(" Speerman's Correlation Coefficient: {0:.3f}".format(SRCC))
    print(" Pearson's Linear Correlation Coefficient: {0:.3f}".format(PLCC))
示例#6
0
 def __init__(self):
     self.flags = FLAGS
     self.img_height = self.flags.img_height
     self.img_width = self.flags.img_width
     self.num_classes = self.flags.num_classes
     self.batch_size = self.flags.batch_size
     self.nEpochs = self.flags.epochs
     # self.is_training = self.flags.is_training
     self.logdir_train = self.flags.logdir_train
     self.logdir_test = self.flags.logdir_test
     self.pred_dir = self.flags.pred_dir
     self.save_path = self.flags.save_path
     self.model_name = self.flags.model_name
     self.train_data_loader = data_loader(self.flags.dataset_dir, self.flags.train_txt,
                                          self.img_height, self.img_width, self.batch_size)
     self.num_samples_per_epoch = self.train_data_loader.length()
     self.val_data_loader = data_loader(self.flags.dataset_dir, self.flags.val_txt,
                                          self.img_height, self.img_width, self.batch_size)
     self._build_graph()
     self._initialize_session()
     self._create_dirs()
示例#7
0
def train_epochs(resume=False, use_glove=True):
    """Train multiple opochs"""

    print('total epochs: ', cfg.EPOCHS, '; use_glove: ', use_glove)

    training_data, word_to_idx, label_to_idx = data_loader()
    model, best_acc, start_epoch = get_model(word_to_idx, label_to_idx, resume,
                                             use_glove)

    losses = []
    loss_function = nn.NLLLoss()
    if cfg.RUN_MODE == 'CNN':
        optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0)
        # optimizer = optim.SGD(model.parameters(), lr=0.1)
        # optimizer = optim.Adagrad(model.parameters(), lr=0.01, weight_decay=0.01)
    else:
        # optimizer = optim.Adam(model.parameters(), lr=0.001)
        optimizer = optim.SGD(model.parameters(), momentum=0.9, lr=0.1)
    # optimizers below are not working
    # optimizer = optim.Adagrad(model.parameters(), lr=0.001)

    since = time.time()
    training_error_rates = []
    test_error_rates = []
    for epoch in range(1 + start_epoch, start_epoch + cfg.EPOCHS + 1):
        train_error, train_loss = train(model, loss_function, optimizer,
                                        training_data, word_to_idx)
        losses.append(train_loss)
        training_error_rates.append(train_error)
        test_error_rate = get_error_rate(model, training=False)
        test_error_rates.append(test_error_rate)
        acc = 1 - test_error_rate
        print('epoch: {}, time: {:.2f}s, cost so far: {}, accurary: {:.3f}'.
              format(epoch, (time.time() - since), train_loss.numpy(), acc))
        if acc > best_acc:
            save_checkpoint(model, acc, epoch)
            best_acc = acc

    # save all_losses
    save_to_pickle('checkpoint/all_losses.p', losses)
    save_to_pickle('checkpoint/training_error_rates.p', training_error_rates)
    save_to_pickle('checkpoint/test_error_rates.p', test_error_rates)
def get_error_rate_with_two_classes(training=False):
    """
    Compute the overall error rate of the trained model.

    Combine positive and negative as one class, compared to neutral.
    If training is False, use test_data, otherwise training_data.
    """
    model = load_checkpoint()['model']

    data, word_to_idx, _ = data_loader(training)

    targets = torch.LongTensor()
    predicts = torch.LongTensor()
    for sentences, _targets, seq_lengths in get_batch_data(data,
                                                           cfg.BATCH_SIZE,
                                                           word_to_idx,
                                                           shuffle=True):

        _predicts = evaluate_batch(model, sentences, seq_lengths)
        targets = torch.cat((targets, _targets), 0)
        predicts = torch.cat((predicts, _predicts), 0)

    # combine positive and negative as one class
    targets[targets == 2] = 1
    predicts[predicts == 2] = 1

    error_rate = (targets != predicts).sum() / targets.size(0)

    print(Counter(targets.numpy()), Counter(predicts.numpy()))
    print('error rate: ', error_rate)
    idx2label = {i: v for v, i in label_to_idx.items()}
    labels = [idx2label[idx] for idx in sorted(idx2label.keys())]
    print(
        'Report:\n',
        metrics.classification_report(targets.numpy(),
                                      predicts.numpy(),
                                      target_names=labels))
    print('Confusion matrix: \n',
          metrics.confusion_matrix(targets.numpy(), predicts.numpy()))

    return error_rate
示例#9
0
def bulit_model():
    (train_processed, train_label), (test_processed,
                                     test_label) = data_loader(test_rate=0.01)
    model = built_attention_model()

    # model = multi_gpu_model(model, gpus=2)

    model.compile(loss=categorical_crossentropy,
                  optimizer='adam',
                  metrics=['accuracy'])

    print('Train...')
    print(train_processed.shape)
    print(train_label.shape)
    print(test_processed.shape)
    print(test_label.shape)
    model.fit(train_processed,
              train_label,
              batch_size=256,
              epochs=18,
              validation_data=(test_processed, test_label))
    return model
def get_error_rate(model=None, training=False, report=False):
    """
    Compute the overall error rate of the trained model.

    If training is False, use test_data, otherwise training_data.
    If report is True, print precision, recall, F1-score, and confusion matrix.
    """
    model = model or load_checkpoint()['model']

    data, word_to_idx, label_to_idx = data_loader(training=training)

    targets = torch.LongTensor()
    predicts = torch.LongTensor()
    for sentences, _targets, seq_lengths in get_batch_data(data,
                                                           cfg.BATCH_SIZE,
                                                           word_to_idx,
                                                           shuffle=True):

        _predicts = evaluate_batch(model, sentences, seq_lengths)
        targets = torch.cat((targets, _targets), 0)
        predicts = torch.cat((predicts, _predicts), 0)

    error_rate = (targets != predicts).sum() / targets.size(0)

    if report:
        print('targets:', Counter(targets.numpy()), 'predicts:',
              Counter(predicts.numpy()))
        print('error rate: ', error_rate)
        idx2label = {i: v for v, i in label_to_idx.items()}
        labels = [idx2label[idx] for idx in sorted(idx2label.keys())]
        print(
            'Report:\n',
            metrics.classification_report(targets.numpy(),
                                          predicts.numpy(),
                                          target_names=labels))
        print('Confusion matrix: \n',
              metrics.confusion_matrix(targets.numpy(), predicts.numpy()))

    return error_rate
示例#11
0
def train_classifier(epochs, data_directories, model, learning_rate, gpu):

    train_loader = data_loader(image_datasets(data_directories[0]), 64, True)
    test_loader = data_loader(image_datasets(data_directories[1]), 64, False)

    #     Cheks if the user specifies the use or not of gpu
    if gpu == True and torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    model.to(device)

    criterion = nn.NLLLoss()

    optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate)

    epochs = epochs
    steps = 0
    running_loss = 0
    print_every = 5

    for epoch in range(epochs):
        for inputs, labels in train_loader:
            steps += 1
            # Moving inputs and labels to the default device
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            output = model.forward(inputs)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            #             Testing our network accuracy and loss
            if steps % print_every == 0:
                # Turning our model into evaluation mode
                test_loss = 0
                accuracy = 0
                model.eval()
                with torch.no_grad():
                    for inputs, labels in test_loader:
                        inputs, labels = inputs.to(device), labels.to(device)
                        output = model.forward(inputs)
                        batch_loss = criterion(output, labels)
                        test_loss += batch_loss.item()

                        # ACCURACY ( This accurary part of the code I got it from the previous classes in the nanodegree)
                        ps = torch.exp(output)
                        top_p, top_class = ps.topk(1, dim=1)
                        equals = top_class == labels.view(*top_class.shape)
                        accuracy += torch.mean(equals.type(
                            torch.FloatTensor)).item()

                print(f"Epoch {epoch+1}/{epochs}.. "
                      f"Train loss: {running_loss/print_every:.3f}.. "
                      f"Test loss: {test_loss/len(test_loader):.3f}.. "
                      f"Test accuracy: {accuracy/len(test_loader):.3f}")

                running_loss = 0
                model.train()

    return model
示例#12
0
def load_and_evaluate(generator, version):
    print("Initializing {} dataset".format(version))
    path = get_dset_path(DATASET_NAME, version)
    _, loader = data_loader(path)
    ade, fde = evaluate(loader, generator)
    print('{} Dataset: {}, Pred Len: {}, ADE: {:.2f}, FDE: {:.2f}'.format(version, DATASET_NAME, PRED_LEN, ade, fde))
示例#13
0
def train(args):
    """
    Training
    """

    ##  ~~~~~~~~~~~~~~~~~~~
    ##   Initial settings
    ##  ~~~~~~~~~~~~~~~~~~~

    #   Input Variable
    nn.clear_parameters()  #   Clear
    Input = nn.Variable([args.batch_size, 3, 64, 64])  #   Input
    Trues = nn.Variable([args.batch_size, 1])  #   True Value

    #   Network Definition
    Name = "CNN"  #   Name of scope which includes network models (arbitrary)
    Output = network(Input, scope=Name)  #   Network & Output
    Output_test = network(Input, scope=Name, test=True)

    #   Loss Definition
    Loss = F.mean(F.absolute_error(Output,
                                   Trues))  #   Loss Function (Squared Error)
    Loss_test = F.mean(F.absolute_error(Output_test, Trues))

    #   Solver Setting
    solver = S.AMSBound(args.learning_rate)  #   Adam is used for solver
    with nn.parameter_scope(
            Name):  #   Get updating parameters included in scope
        solver.set_parameters(nn.get_parameters())

    #   Training Data Setting
    image_data, mos_data = dt.data_loader()
    batches = dt.create_batch(image_data, mos_data, args.batch_size)
    del image_data, mos_data

    #   Test Data Setting
    image_data, mos_data = dt.data_loader(test=True)
    batches_test = dt.create_batch(image_data, mos_data, args.batch_size)
    del image_data, mos_data

    ##  ~~~~~~~~~~~~~~~~~~~
    ##   Learning
    ##  ~~~~~~~~~~~~~~~~~~~
    print('== Start Training ==')

    bar = tqdm(total=args.epoch - args.retrain, leave=False)
    bar.clear()
    loss_disp = None
    SRCC = None

    #   Load data
    if args.retrain > 0:
        with nn.parameter_scope(Name):
            print('Retrain from {0} Epoch'.format(args.retrain))
            nn.load_parameters(
                os.path.join(args.model_save_path,
                             "network_param_{:04}.h5".format(args.retrain)))
            solver.set_learning_rate(args.learning_rate /
                                     np.sqrt(args.retrain))

    ##  Training
    for i in range(args.retrain, args.epoch):

        bar.set_description_str('Epoch {0}:'.format(i + 1), refresh=False)
        if (loss_disp is not None) and (SRCC is not None):
            bar.set_postfix_str('Loss={0:.5f},  SRCC={1:.4f}'.format(
                loss_disp, SRCC),
                                refresh=False)
        bar.update(1)

        #   Shuffling
        batches.shuffle()
        batches_test.shuffle()

        ##  Batch iteration
        for j in range(batches.iter_n):

            #  Load Batch Data from Training data
            Input.d, Trues.d = next(batches)

            #  Update
            solver.zero_grad()  #   Initialize
            Loss.forward(clear_no_need_grad=True)  #   Forward path
            Loss.backward(clear_buffer=True)  #   Backward path
            solver.weight_decay(0.00001)  #   Weight Decay for stable update
            solver.update()

        ## Progress
        # Get result for Display
        Input.d, Trues.d = next(batches_test)
        Loss_test.forward(clear_no_need_grad=True)
        Output_test.forward()
        loss_disp = Loss_test.d
        SRCC, _ = stats.spearmanr(Output_test.d, Trues.d)

        # Display text
        # disp(i, batches.iter_n, Loss_test.d)

        ## Save parameters
        if ((i + 1) % args.model_save_cycle) == 0 or (i + 1) == args.epoch:
            bar.clear()
            with nn.parameter_scope(Name):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 'network_param_{:04}.h5'.format(i + 1)))
示例#14
0
文件: main.py 项目: jays0606/EEE4610
import glob

root = "./-334_897/*"
folder = sorted(glob.glob(root))

for path in folder:
    image_size = int(path.split("/")[2])
    image_path = "./-334_897/" + str(image_size) + "/Image"
    mask_path = "./-334_897/" + str(image_size) + "/Mask"

    train_set, val_set, test_set = load_dataset(img_path=image_path,
                                                mask_path=mask_path,
                                                img_size=image_size,
                                                test_num=2000,
                                                load=False)
    train_loader = data_loader(train_set, 16)
    val_loader = data_loader(val_set, 16)

    net = UnetGenerator(in_dim=1, out_dim=1, num_filter=16)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net.to(device)

    test = train(net=net,
                 image_size=image_size,
                 train_loader=train_loader,
                 val_loader=val_loader)
    test.run(learning_rate=0.001,
             patience=10,
             mini_batch=50,
             epochs=50,
def train(args):

    ##  Sub-functions
    ## ---------------------------------
    ## Save Models
    def save_models(epoch_num, losses):

        # save generator parameter
        with nn.parameter_scope('Wave-U-Net'):
            nn.save_parameters(
                os.path.join(args.model_save_path,
                             'param_{:04}.h5'.format(epoch_num + 1)))

        # save results
        np.save(
            os.path.join(args.model_save_path,
                         'losses_{:04}.npy'.format(epoch_num + 1)),
            np.array(losses))

    ## Load Models
    def load_models(epoch_num, gen=True, dis=True):

        # load generator parameter
        with nn.parameter_scope('Wave-U-Net'):
            nn.load_parameters(
                os.path.join(args.model_save_path,
                             'param_{:04}.h5'.format(args.epoch_from)))

    ## Update parameters
    class updating:
        def __init__(self):
            self.scale = 8 if args.halfprec else 1

        def __call__(self, solver, loss):
            solver.zero_grad()  # initialize
            loss.forward(clear_no_need_grad=True)  # calculate forward
            loss.backward(self.scale, clear_buffer=True)  # calculate backward
            #solver.scale_grad(1. / self.scale)                # scaling
            solver.update()  # update

    ##  Inital Settings
    ## ---------------------------------

    ##  Create network
    #   Clear
    nn.clear_parameters()
    #   Variables
    noisy = nn.Variable([args.batch_size, 1, 16384], need_grad=False)  # Input
    clean = nn.Variable([args.batch_size, 1, 16384], need_grad=False)  # Desire

    # Build Network
    # K=2, C=1
    target_1, target_2 = Wave_U_Net(noisy)

    # Mean Squared Error
    loss = (F.mean(F.squared_error(clean, target_1)) +
            F.mean(F.squared_error(noisy - clean, target_2))) / 2.

    # Optimizer: Adam
    solver = S.Adam(args.learning_rate)

    # set parameter
    with nn.parameter_scope('Wave-U-Net'):
        solver.set_parameters(nn.get_parameters())

    ##  Load data & Create batch
    clean_data, noisy_data = dt.data_loader()
    batches = dt.create_batch(clean_data, noisy_data, args.batch_size)
    del clean_data, noisy_data

    ##  Initial settings for sub-functions
    fig = figout()
    disp = display(args.epoch_from, args.epoch, batches.batch_num)
    upd = updating()

    ##  Train
    ##----------------------------------------------------

    print('== Start Training ==')

    ##  Load "Pre-trained" parameters
    if args.epoch_from > 0:
        print(' Retrain parameter from pre-trained network')
        load_models(args.epoch_from)
        losses = np.load(
            os.path.join(args.model_save_path,
                         'losses_{:04}.npy'.format(args.epoch_from)))

        ## Create loss loggers
        point = args.epoch_from * ((batches.batch_num + 1) // 10)
        loss_len = (args.epoch - args.epoch_from) * (
            (batches.batch_num + 1) // 10)
        losses = np.append(losses, np.zeros(loss_len))
    else:
        losses = []
        ## Create loss loggers
        point = len(losses)
        loss_len = (args.epoch - args.epoch_from) * (
            (batches.batch_num + 1) // 10)
        losses = np.append(losses, np.zeros(loss_len))

    ##  Training
    for i in range(args.epoch_from, args.epoch):

        print('')
        print(' =========================================================')
        print('  Epoch :: {0}/{1}'.format(i + 1, args.epoch))
        print(' =========================================================')
        print('')

        batches.shuffle()

        #  Batch iteration
        for j in range(batches.batch_num):
            print('  Train (Epoch. {0}) - {1}/{2}'.format(
                i + 1, j + 2, batches.batch_num))

            ##  Batch setting
            clean.d, noisy.d = batches.next(j)

            ##  Updating
            upd(solver, loss)  # update Generator

            ##  Display
            if (j) % 100 == 0:
                # Get result for Display
                target_1.forward(clear_no_need_grad=True)
                target_2.forward(clear_no_need_grad=True)

                # Display text
                disp(i, j, loss.d)

                # Data logger
                losses[point] = loss.d
                point = point + 1

                # Plot
                fig.waveform_1(noisy.d[0, 0, :], target_1.d[0, 0, :],
                               clean.d[0, 0, :])
                fig.waveform_2(noisy.d[0, 0, :], target_2.d[0, 0, :],
                               clean.d[0, 0, :])
                fig.loss(losses[0:point - 1])
                pg.QtGui.QApplication.processEvents()

        ## Save parameters
        if ((i + 1) % args.model_save_cycle) == 0:
            save_models(i, losses)  # save model
            # fig.save(os.path.join(args.model_save_path, 'plot_{:04}.pdf'.format(i + 1))) # save fig
            exporter = pg.exporters.ImageExporter(fig.win.scene(
            ))  # exportersの直前に pg.QtGui.QApplication.processEvents() を呼ぶ!
            exporter.export(
                os.path.join(args.model_save_path,
                             'plot_{:04}.png'.format(i + 1)))  # save fig

    ## Save parameters (Last)
    save_models(args.epoch - 1, losses)
    exporter = pg.exporters.ImageExporter(fig.win.scene(
    ))  # exportersの直前に pg.QtGui.QApplication.processEvents() を呼ぶ!
    exporter.export(
        os.path.join(args.model_save_path,
                     'plot_{:04}.png'.format(i + 1)))  # save fig
示例#16
0
                      f'prec5: {top5.val:.3f} ({top5.avg:.3f}) '
                      f'Loss: {val_loss.val:.4f} ({val_loss.avg:.4f})')

    return val_loss.avg, top1.avg


if __name__ == '__main__':
    args = arg_parser()

    best_acc = 0

    # initialize checkpoint directory
    initialize_dir("./checkpoint")

    # data loader
    train_loader, val_loader = data_loader(args)

    # model + loss
    model, model_linear = load_model(args)
    criterion = load_loss(args)

    # optimizer + lr scheduler
    optimizer = load_optimizer(args, model_linear)
    lr_scheduler = get_lr_scheduler(args, optimizer)

    # tensorboardX
    os.makedirs(args.log_dir, exist_ok=True)
    summary_writer = SummaryWriter(args.log_dir)

    end = time.time()
示例#17
0
def main():
    train_loader, _, test_loader = data.data_loader(data=args.data,
                                                    batch_size=args.batch_size)

    if len(args.gpu_id) == 1:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
        if args.model == 'resnet18':
            model = preact_resnet.resnet18_cifar(
                num_classes=args.num_classes).cuda()
        elif args.model == 'preactResnet110':
            model = preact_resnet.preact_resnet110_cifar(
                num_classes=args.num_classes).cuda()
    else:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
        if args.model == 'resnet18':
            model = nn.DataParallel(
                preact_resnet.resnet18_cifar(
                    num_classes=args.num_classes).cuda(),
                device_ids=[
                    idx for idx in range(int(len(args.gpu_id) + 1) / 2)
                ])
        elif args.model == 'preactResnet110':
            model = nn.DataParallel(
                preact_resnet.preact_resnet110_cifar().cuda(
                    num_classes=args.num_classes),
                device_ids=[
                    idx for idx in range(int(len(args.gpu_id) + 1) / 2)
                ])

    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=0.9,
                          weight_decay=0.0005)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer,
                                               milestones=[160],
                                               gamma=0.1)
    if args.individual_loss == True:
        criterion = nn.CrossEntropyLoss(reduce=False).cuda()
        loss_criterion = nn.MarginRankingLoss(margin=1.0, reduce=False).cuda()
    elif args.individual_loss == False:
        criterion = nn.CrossEntropyLoss().cuda()

    train_accuracy_li = []
    train_loss_li = []
    train_ranking_loss_li = []
    valid_accuracy_li = []
    valid_loss_li = []
    valid_ranking_loss_li = []
    for epoch in range(1, args.epoch + 1):
        if args.scheduler == True:
            scheduler.step()

        print('Epoch {0}/{1}'.format(epoch, args.epoch))
        ############################                   TRAIN                     ##############################
        train_accuracy, train_loss, train_ranking_loss, train_probability, train_correctness, train_individual_loss, train_pred_output = train(
            train_loader,
            model,
            optimizer,
            criterion,
            ranking_criterion=loss_criterion,
            epoch=epoch)
        print('accuracy: {0}, loss: {1}, ranking loss: {2}'.format(
            train_accuracy, train_loss, train_ranking_loss))

        train_accuracy_li.append(train_accuracy)
        train_loss_li.append(train_loss)
        train_ranking_loss_li.append(train_ranking_loss)

        utils.save_log_csv('{}-{}-sgd-train'.format(args.model, args.data),
                           train_accuracy_li, train_loss_li,
                           train_ranking_loss_li)
        utils.save_prob_csv('{}-{}-sgd-train'.format(args.model, args.data),
                            epoch, train_correctness, train_probability)
        utils.save_loss_csv('{}-{}-sgd-train'.format(args.model, args.data),
                            epoch, train_individual_loss, train_pred_output)

        ############################                 VALIDATION                   ##############################
        valid_accuracy, valid_loss, valid_ranking_loss, valid_probability, valid_correctness, valid_individual_loss, valid_pred_output = validate(
            test_loader,
            model,
            criterion,
            ranking_criterion=loss_criterion,
            epoch=epoch)
        print('accuracy: {0}, loss: {1}, ranking loss: {2}'.format(
            valid_accuracy, valid_loss, valid_ranking_loss))

        valid_accuracy_li.append(valid_accuracy)
        valid_loss_li.append(valid_loss)
        valid_ranking_loss_li.append(valid_ranking_loss)

        utils.save_log_csv('{}-{}-sgd-test'.format(args.model, args.data),
                           valid_accuracy_li, valid_loss_li,
                           valid_ranking_loss_li)
        utils.save_prob_csv('{}-{}-sgd-test'.format(args.model, args.data),
                            epoch, valid_correctness, valid_probability)
        utils.save_loss_csv('{}-{}-sgd-test'.format(args.model, args.data),
                            epoch, valid_individual_loss, train_pred_output)
示例#18
0
from data import data_loader

if __name__ == "__main__":
    loader = data_loader(2048, 64, 64, 1000, 5000)
    loader.save_npy()
    loader.save_embedding_matrix(200)



示例#19
0
from collections import Counter

from data import data_loader
from util import load_pickle

data, word_to_idx, label_to_idx = data_loader()
print(len(data))

data, word_to_idx, label_to_idx = data_loader(balance_skew=False)
print(len(data))

print(data[:2])
print(len(word_to_idx))

# print word by length in reverse order
# print('\n'.join(sorted(word_to_idx.keys(), key=len, reverse=True)[:100]))

# path_to_glove = 'GloVe-1.2/vectors.txt'
# with open(path_to_glove, 'r') as f:
#     glove_words = set((line.split()[0] for line in f.readlines()))
#     our_words = set(word_to_idx.keys())
#     print(len(glove_words.intersection(our_words)))
#     # get word that is in word_to_idx but not in Glove
#     print(len(our_words.difference(glove_words)))
#     print(our_words.difference(glove_words))
示例#20
0
def train(args):

    ##  Sub-functions
    ## ---------------------------------
    ## Save Models
    def save_models(epoch_num, cle_disout, fake_disout, losses_gen, losses_dis, losses_ae):

        # save generator parameter
        with nn.parameter_scope("gen"):
            nn.save_parameters(os.path.join(args.model_save_path, 'generator_param_{:04}.h5'.format(epoch_num + 1)))

        # save discriminator parameter
        with nn.parameter_scope("dis"):
            nn.save_parameters(os.path.join(args.model_save_path, 'discriminator_param_{:04}.h5'.format(epoch_num + 1)))

        # save results
        np.save(os.path.join(args.model_save_path, 'disout_his_{:04}.npy'.format(epoch_num + 1)), np.array([cle_disout, fake_disout]))
        np.save(os.path.join(args.model_save_path, 'losses_gen_{:04}.npy'.format(epoch_num + 1)), np.array(losses_gen))
        np.save(os.path.join(args.model_save_path, 'losses_dis_{:04}.npy'.format(epoch_num + 1)), np.array(losses_dis))
        np.save(os.path.join(args.model_save_path, 'losses_ae_{:04}.npy'.format(epoch_num + 1)), np.array(losses_ae))

    ## Load Models
    def load_models(epoch_num, gen=True, dis=True):

        # load generator parameter
        with nn.parameter_scope("gen"):
            nn.load_parameters(os.path.join(args.model_save_path, 'generator_param_{:04}.h5'.format(args.epoch_from)))

        # load discriminator parameter
        with nn.parameter_scope("dis"):
            nn.load_parameters(os.path.join(args.model_save_path, 'discriminator_param_{:04}.h5'.format(args.epoch_from)))

    ## Update parameters
    class updating:

        def __init__(self):
            self.scale = 8 if args.halfprec else 1

        def __call__(self, solver, loss):
            solver.zero_grad()                                  # initialize
            loss.forward(clear_no_need_grad=True)               # calculate forward
            loss.backward(self.scale, clear_buffer=True)      # calculate backward
            solver.scale_grad(1. / self.scale)                # scaling
            solver.weight_decay(args.weight_decay * self.scale) # decay
            solver.update()                                     # update


    ##  Inital Settings
    ## ---------------------------------

    ##  Create network
    #   Clear
    nn.clear_parameters()
    #   Variables
    noisy 		= nn.Variable([args.batch_size, 1, 16384], need_grad=False)  # Input
    clean 		= nn.Variable([args.batch_size, 1, 16384], need_grad=False)  # Desire
    z           = nn.Variable([args.batch_size, 1024, 8], need_grad=False)   # Random Latent Variable
    #   Generator
    genout = Generator(noisy, z)                       # Predicted Clean
    genout.persistent = True                # Not to clear at backward
    loss_gen 	= Loss_gen(genout, clean, Discriminator(noisy, genout))
    loss_ae     = F.mean(F.absolute_error(genout, clean))
    #   Discriminator
    fake_dis 	= genout.get_unlinked_variable(need_grad=True)
    cle_disout  = Discriminator(noisy, clean)
    fake_disout  = Discriminator(noisy, fake_dis)
    loss_dis    = Loss_dis(Discriminator(noisy, clean),Discriminator(noisy, fake_dis))

    ##  Solver
    # RMSprop.
    # solver_gen = S.RMSprop(args.learning_rate_gen)
    # solver_dis = S.RMSprop(args.learning_rate_dis)
    # Adam
    solver_gen = S.Adam(args.learning_rate_gen)
    solver_dis = S.Adam(args.learning_rate_dis)
    # set parameter
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    ##  Load data & Create batch
    clean_data, noisy_data = dt.data_loader()
    batches     = dt.create_batch(clean_data, noisy_data, args.batch_size)
    del clean_data, noisy_data

    ##  Initial settings for sub-functions
    fig     = figout()
    disp    = display(args.epoch_from, args.epoch, batches.batch_num)
    upd     = updating()

    ##  Train
    ##----------------------------------------------------

    print('== Start Training ==')

    ##  Load "Pre-trained" parameters
    if args.epoch_from > 0:
        print(' Retrain parameter from pre-trained network')
        load_models(args.epoch_from, dis=False)
        losses_gen  = np.load(os.path.join(args.model_save_path, 'losses_gen_{:04}.npy'.format(args.epoch_from)))
        losses_dis  = np.load(os.path.join(args.model_save_path, 'losses_dis_{:04}.npy'.format(args.epoch_from)))
        losses_ae   = np.load(os.path.join(args.model_save_path, 'losses_ae_{:04}.npy'.format(args.epoch_from)))
    else:
        losses_gen  = []
        losses_ae   = []
        losses_dis  = []

    ## Create loss loggers
    point       = len(losses_gen)
    loss_len    = (args.epoch - args.epoch_from) * ((batches.batch_num+1)//10)
    losses_gen  = np.append(losses_gen, np.zeros(loss_len))
    losses_ae   = np.append(losses_ae, np.zeros(loss_len))
    losses_dis  = np.append(losses_dis, np.zeros(loss_len))

    ##  Training
    for i in range(args.epoch_from, args.epoch):

        print('')
        print(' =========================================================')
        print('  Epoch :: {0}/{1}'.format(i + 1, args.epoch))
        print(' =========================================================')
        print('')

        #  Batch iteration
        for j in range(batches.batch_num):
            print('  Train (Epoch. {0}) - {1}/{2}'.format(i+1, j+1, batches.batch_num))

            ##  Batch setting
            clean.d, noisy.d = batches.next(j)
            #z.d = np.random.randn(*z.shape)
            z.d = np.zeros(z.shape)

            ##  Updating
            upd(solver_gen, loss_gen)       # update Generator
            upd(solver_dis, loss_dis)       # update Discriminator

            ##  Display
            if (j+1) % 10 == 0:
                # Get result for Display
                cle_disout.forward()
                fake_disout.forward()
                loss_ae.forward(clear_no_need_grad=True)

                # Display text
                disp(i, j, loss_gen.d, loss_dis.d, loss_ae.d)

                # Data logger
                losses_gen[point] = loss_gen.d
                losses_ae[point]  = loss_ae.d
                losses_dis[point] = loss_dis.d
                point = point + 1

                # Plot
                fig.waveform(noisy.d[0,0,:], genout.d[0,0,:], clean.d[0,0,:])
                fig.loss(losses_gen[0:point-1], losses_ae[0:point-1], losses_dis[0:point-1])
                fig.histogram(cle_disout.d, fake_disout.d)
                pg.QtGui.QApplication.processEvents()


        ## Save parameters
        if ((i+1) % args.model_save_cycle) == 0:
            save_models(i, cle_disout.d, fake_disout.d, losses_gen[0:point-1], losses_dis[0:point-1], losses_ae[0:point-1])  # save model
            exporter = pg.exporters.ImageExporter(fig.win.scene())  # Call pg.QtGui.QApplication.processEvents() before exporters!!
            exporter.export(os.path.join(args.model_save_path, 'plot_{:04}.png'.format(i + 1))) # save fig

    ## Save parameters (Last)
    save_models(args.epoch-1, cle_disout.d, fake_disout.d, losses_gen, losses_dis, losses_ae)
def main():
    # Training settings
    parser = argparse.ArgumentParser(
        description="Measuring Privacy and Fairness Trade-offs")
    parser.add_argument(
        "-rn",
        "--run-name",
        required=True,
        type=str,
        help="Define run name for logging",
    )
    parser.add_argument(
        "-b",
        "--batch-size",
        type=int,
        default=128,
        metavar="B",
        help="Input batch size for training (default: 128)",
    )
    parser.add_argument(
        "--test-batch-size",
        type=int,
        default=4119,
        metavar="TB",
        help="Input batch size for testing (default: 1024)",
    )
    parser.add_argument(
        "-n",
        "--epochs",
        type=int,
        default=20,
        metavar="N",
        help="Number of epochs to train (default: 20)",
    )
    parser.add_argument(
        "-r",
        "--n-runs",
        type=int,
        default=1,
        help="Number of runs to average on (default: 1)",
    )
    parser.add_argument(
        "--lr",
        type=float,
        default=.1,
        metavar="LR",
        help="Learning rate (default: .1)",
    )
    parser.add_argument(
        "--sigma",
        type=list,
        #default=[3.0, 0.6],
        default=[
            0, 3.0, 2.85, 2.6, 2.45, 2.3, 2.15, 2.0, 1.85, 1.6, 1.45, 1.3,
            1.15, 1.0, 0.85, 0.6, 0.45, 0.3, 0.15
        ],
        metavar="S",
        help="Noise multiplier (default [0, 0.1, 0.5, 1.0])",
    )
    parser.add_argument(
        "-c",
        "--max-per-sample-grad_norm",
        type=float,
        default=1.0,
        metavar="C",
        help="Clip per-sample gradients to this norm (default 1.0)",
    )
    parser.add_argument(
        "--delta",
        type=float,
        default=1e-5,
        metavar="D",
        help="Target delta (default: 1e-5)",
    )
    parser.add_argument(
        "--device",
        type=str,
        default="cuda",
        help="GPU ID for this process (default: 'cuda')",
    )
    parser.add_argument(
        "--save-model",
        action="store_true",
        default=False,
        help="Save the trained model (default: false)",
    )
    parser.add_argument(
        "--disable-dp",
        action="store_true",
        default=False,
        help="Disable privacy training and just train with vanilla SGD",
    )

    parser.add_argument(
        "--dataset",
        type=str,
        #default="bank",
        required=True,
        help=
        "Specify the dataset you want to test on. (bank: bank marketing, adult: adult census)",
    )
    parser.add_argument(
        "--train-data-path",
        type=str,
        default="./bank-data/bank-additional-full.csv",
        help="Path to train data",
    )
    parser.add_argument(
        "--test-data-path",
        type=str,
        default="./bank-data/bank-additional.csv",
        help="Path to test data",
    )
    parser.add_argument(
        "--num-teachers",
        type=int,
        default=0,
        help="Number of PATE teacher (default=3)",
    )
    parser.add_argument(
        "--sensitive",
        type=str,
        required=True,
        help="Name of sensitive column",
    )
    args = parser.parse_args()
    device = torch.device(args.device)

    #    for i in range(args.n_runs):
    for i, s in enumerate(args.sigma):
        if args.num_teachers == 0 or s == 0:
            dataset = data_loader(args, s)
            train_data, test_data = dataset.__getitem__()
            cat_emb_size, num_conts = dataset.get_input_properties()
            train_size, test_size = dataset.__len__()
            sensitive_cat_keys = dataset.getkeys()
            sensitive_idx = dataset.get_sensitive_idx()
            print(sensitive_cat_keys)
        else:
            dataset = data_loader(args, s)
            train_size, test_size = dataset.__len__()
            teacher_loaders = dataset.train_teachers()
            student_train_loader, student_test_loader = dataset.student_data()
            cat_emb_size, num_conts = dataset.get_input_properties()
            sensitive_cat_keys = dataset.getkeys()
            sensitive_idx = dataset.get_sensitive_idx()
            print(sensitive_cat_keys)

            print("!!!!!! DATA LOADED")

        #run_results = []

        wandb.init(project="project3",
                   name=args.run_name,
                   config={
                       "run_name": args.run_name,
                       "architecture": 'RegressionModel',
                       "dataset": args.dataset,
                       "batch_size": args.batch_size,
                       "n_epoch": args.epochs,
                       "learning_rate": args.lr,
                       "sigma(noise)": s,
                       "disable_dp": args.disable_dp,
                   })
        config = wandb.config

        model = RegressionModel(emb_szs=cat_emb_size,
                                n_cont=num_conts,
                                emb_drop=0.04,
                                out_sz=1,
                                szs=[1000, 500, 250],
                                drops=[0.001, 0.01, 0.01],
                                y_range=(0, 1)).to(device)

        for layer in model.children():
            if hasattr(layer, 'reset_parameters'):
                layer.reset_parameters()

        criterion = nn.BCELoss()
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0)

        if not args.disable_dp:
            if s > 0:
                privacy_engine = PrivacyEngine(
                    model,
                    batch_size=args.batch_size,
                    sample_size=train_size,
                    alphas=[1 + x / 10.0
                            for x in range(1, 100)] + list(range(12, 64)),
                    noise_multiplier=s,
                    max_grad_norm=args.max_per_sample_grad_norm,
                    secure_rng=False,
                )
                privacy_engine.attach(optimizer)

        if args.num_teachers == 0 or s == 0:
            if i == 0:  # print model properties
                print(model, '\n')

            print(
                "\n=== RUN # {} ====================================\n".format(
                    i))

            for epoch in range(1, args.epochs + 1):
                train(args, model, device, train_data, criterion, optimizer,
                      epoch, s)
            """

            batch = next(iter(train_data))
            cats, conts, _ = batch

            test_batch = next(iter(test_data))
            test_cats, test_conts, _ = test_batch

            explainer = shap.KernelExplainer(model, [cats.numpy(), conts.numpy()])
            print(explainer)
            shap_values = explainer.shap_values(cats.numpy())
            shap.plots.bar(shap_values)
            exit():q
            
            """
            accuracy, avg_loss, avg_precision, avg_recall, avg_eq_odds, avg_tpr, avg_dem_par, cm, sub_cm, overall_results = test(
                args, model, device, test_data, test_size, sensitive_idx)
        else:  # PATE MODEL
            print("!!!!!! ENTERED HERE")
            #model_rf = RandomForestClassifier(random_state=42, warm_start=True)

            teacher_models = train_models(args, model, teacher_loaders,
                                          criterion, optimizer, device)
            preds, student_labels = aggregated_teacher(teacher_models,
                                                       student_train_loader, s,
                                                       device)

            accuracy, avg_loss, avg_precision, avg_recall, avg_eq_odds, avg_tpr, avg_dem_par, cm, sub_cm, overall_results = test_student(
                args, student_train_loader, student_labels,
                student_test_loader, test_size, cat_emb_size, num_conts,
                device, sensitive_idx)
            """
            data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds, indices=student_labels,
                                                               noise_eps=s, delta=1e-5)
            print("Data Independent Epsilon:", data_ind_eps)
            print("Data Dependent Epsilon:", data_dep_eps)
            """

        #t = [accuracy, avg_loss, avg_precision, avg_recall, avg_eq_odds, avg_tpr, avg_dem_par, cm, sub_cm, overall_results]
        #[print(type(i)) for i in t]

        #print("\nTest set: Average loss: {:.4f}, Accuracy: {:.2f}%\n".format(avg_loss,accuracy))
        result = """
===================
Test set: {}

accuracy: {:.4f}
average loss: {:.4f}
precision: {:.4f}
recall: {:.4f}
sub_pre_rec:
{}
cm:
{}
sub_cm:
{}
avg_eq_odds: {:.4f}
avg_tpr: {:.4f}
avg_dem_par: {:.4f}
""".format(args.run_name, accuracy, avg_loss, avg_precision, avg_recall,
           overall_results, cm, sub_cm, avg_eq_odds, avg_tpr, avg_dem_par)

        # append run result
        file_path = 'out//all_results.' + args.run_name
        file_object = open(file_path, 'a+')
        file_object.write(result)
        file_object.close()
        print(result)
        log_dict = {
            "accuracy": accuracy,
            "avg_loss": avg_loss,
            "precision": avg_precision,
            "recall": avg_recall,
            "avg_eq_odds": avg_eq_odds,
            "avg_tpr": avg_tpr,
            "avg_dem_par": avg_dem_par,
            "tn": cm[0],
            "fp": cm[1],
            "fn": cm[2],
            "tp": cm[3]
        }
        """
        for j in avg_recall_by_group.keys():
            category = sensitive_cat_keys[j]
            value = avg_recall_by_group[j]
            log_dict[category] = value
        """
        print(log_dict)
        wandb.log(log_dict)
示例#22
0
from data import data_loader
from option import option
import torch
opt = option()

data = data_loader(opt.dataset, 'resize', batchsize=8)
for i, batch in enumerate(data):
    print(batch)
def test(args):

    ##  Load data & Create batch
    clean_data, noisy_data, length_data = dt.data_loader(test=True,
                                                         need_length=True)
    print(clean_data.shape)
    # Batch
    #  - Proccessing speech interval can be adjusted by "start_frame" and "start_frame".
    #  - "None" -> All speech in test dataset.

    output_ts = []
    bt_idx = 0
    test_batch_size = args.batch_size
    for i in range(clean_data.shape[0] // (test_batch_size * 2)):
        print(i, "/", clean_data.shape[0] // (test_batch_size * 2))
        batches_test = dt.create_batch_test(
            clean_data[bt_idx:bt_idx + test_batch_size * 2],
            noisy_data[bt_idx:bt_idx + test_batch_size * 2],
            start_frame=0,
            stop_frame=test_batch_size * 2)

        ##  Create network
        # Variables
        noisy_t = nn.Variable(batches_test.noisy.shape)  # Input
        # Network (Only Generator)
        output_t, _ = Wave_U_Net(noisy_t)
        ##  Load parameter
        # load generator
        with nn.parameter_scope('Wave-U-Net'):
            nn.load_parameters(
                os.path.join(args.model_save_path,
                             "param_{:04}.h5".format(args.epoch)))
        ##  Validation
        noisy_t.d = batches_test.noisy
        output_t.forward()

        ##  Create wav files
        output = output_t.d.flatten()
        output_ts.append(output)
        bt_idx += (test_batch_size * 2)

    if (clean_data.shape[0] % (test_batch_size * 2)) != 0:
        last_batch_size_2 = clean_data.shape[0] % (test_batch_size * 2)
        print(last_batch_size_2)
        batches_test = dt.create_batch_test(
            clean_data[bt_idx:bt_idx + last_batch_size_2],
            noisy_data[bt_idx:bt_idx + last_batch_size_2],
            start_frame=0,
            stop_frame=last_batch_size_2)

        ##  Create network
        # Variables
        noisy_t = nn.Variable(batches_test.noisy.shape)  # Input
        # Network (Only Generator)
        ##  Load parameter
        output_t, _ = Wave_U_Net(noisy_t)
        # load generator
        with nn.parameter_scope('Wave-U-Net'):
            nn.load_parameters(
                os.path.join(args.model_save_path,
                             "param_{:04}.h5".format(args.epoch)))
        ##  Validation
        noisy_t.d = batches_test.noisy
        output_t.forward()

        ##  Create wav files
        output = output_t.d.flatten()
        output_ts.append(output)
        bt_idx += (last_batch_size_2)

    output = output_ts[0]
    for i in range(1, len(output_ts)):
        output = np.concatenate([output, output_ts[i]], axis=0)
    print(len(output))
    output = np.array(output)
    print(output.shape)
    idx_cnt = 0
    for i in range(len(length_data['name'])):
        print("saving", i, length_data['name'][i], "...")
        outwav = output[idx_cnt:idx_cnt + length_data['length'][i]]
        print(outwav.shape)
        idx_cnt += length_data['length'][i]
        print(idx_cnt)
        dt.wav_write((args.wav_save_path + '/' + 'ests_' +
                      os.path.basename(length_data['name'][i])),
                     np.array(outwav),
                     fs=16000)
    print('finish!')
示例#24
0
import numpy as np
import tensorflow as tf
from data import modelCNN, preprocess_img_path, data_loader
from models import Encoder, Decoder
from PIL import Image
from nltk.translate.bleu_score import corpus_bleu
import matplotlib.pyplot as plt
from train import validate
import math

if __name__ == "__main__":

    loader = data_loader(features_shape=2048,
                         attention_features_shape=64,
                         batch_size=256,
                         buffer_size=1000,
                         top_k=5000)

    dataset_train = loader.load_dataset("train")
    dataset_test = loader.load_dataset("test")
    tokenizer = loader.tokenizer

    # loading model
    embedding_matrix = np.load(
        "./content/drive/My Drive/datasets/embeddingmatrix.npy")
    encoder = Encoder(encoder_dim=200)
    decoder = Decoder(embedding_dim=200,
                      vocab_size=loader.top_k + 1,
                      units=512,
                      embedding_matrix=embedding_matrix)
    optimizer = tf.keras.optimizers.Adam()
示例#25
0
def run(EPOCHS):

    ## Load data and init
    loader = data_loader(
        features_shape=2048,
        attention_features_shape=64,
        batch_size=64,
        buffer_size=1000,
        top_k=5000,
    )
    dataset_train = loader.load_dataset("train")
    dataset_test = loader.load_dataset("test")
    tokenizer = loader.tokenizer

    embedding_dim = 200
    encoder_dim = embedding_dim
    units = 512
    vocab_size = loader.top_k + 1
    num_steps = len(loader.train_path) // loader.batch_size

    ## Load model
    embedding_matrix = np.load(
        "/content/drive/My Drive/datasets/embeddingmatrix.npy")
    encoder = Encoder(encoder_dim)
    decoder = Decoder(embedding_dim, vocab_size, units, embedding_matrix)
    optimizer = tf.keras.optimizers.Adam()
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True, reduction="none")
    ## Load checkpoint
    checkpoint_path = "/content/drive/My Drive/datasets/modelcheckpoint/embedding"
    ckpt = tf.train.Checkpoint(encoder=encoder,
                               decoder=decoder,
                               optimizer=optimizer)
    ckpt_manager = tf.train.CheckpointManager(ckpt,
                                              checkpoint_path,
                                              max_to_keep=3)
    start_epoch = 0
    if ckpt_manager.latest_checkpoint:
        start_epoch = int(ckpt_manager.latest_checkpoint.split("-")[-1])
        ckpt.restore(ckpt_manager.latest_checkpoint)

    # Running
    BLEU_1, BLEU_2, BLEU_3, BLEU_4 = [], [], [], []
    loss_plot = []
    for epoch in range(start_epoch, EPOCHS):
        start = time()
        total_loss = 0
        print("---Training---Epoch {}".format(epoch))
        for (batch, (img_tensor, target)) in enumerate(dataset_train):
            batch_loss, t_loss = train_step(encoder, decoder, optimizer,
                                            tokenizer, loss_object, img_tensor,
                                            target)
            total_loss += t_loss

        loss_plot.append(total_loss / num_steps)

        if epoch % 10 == 0:
            ckpt_manager.save()
            print("---Testing---Epoch {}".format(epoch))
            bleu_1, bleu_2, bleu_3, bleu_4 = 0, 0, 0, 0
            for (batch, (img_tensor, target)) in enumerate(dataset_test):
                hypotheses, references = validate(encoder, decoder, optimizer,
                                                  tokenizer, img_tensor,
                                                  target)
                bleu_1 += corpus_bleu(references,
                                      hypotheses,
                                      weights=(1, 0, 0, 0))
                bleu_2 += corpus_bleu(references,
                                      hypotheses,
                                      weights=(0.5, 0.5, 0, 0))
                bleu_3 += corpus_bleu(references,
                                      hypotheses,
                                      weights=(0.33, 0.33, 0.33, 0))
                bleu_4 += corpus_bleu(references,
                                      hypotheses,
                                      weights=(0.25, 0.25, 0.25, 0.25))
                if batch == 5:
                    break
            bleu_1, bleu_2, bleu_3, bleu_4 = (
                bleu_1 / (batch + 1),
                bleu_2 / (batch + 1),
                bleu_3 / (batch + 1),
                bleu_4 / (batch + 1),
            )
            print("Bleu_1: {}".format(bleu_1))
            print("Bleu_2: {}".format(bleu_2))
            print("Bleu_3: {}".format(bleu_3))
            print("Bleu_4: {}".format(bleu_4))
            BLEU_1.append(bleu_1)
            BLEU_2.append(bleu_2)
            BLEU_3.append(bleu_3)
            BLEU_4.append(bleu_4)
            print("Epoch {} Loss {:.6f}".format(epoch, total_loss / num_steps))
            print("Time taken for 1 epoch {} sec\n".format(time() - start))

    return BLEU_1, BLEU_2, BLEU_3, BLEU_4, loss_plot
示例#26
0
    if args.model_path != '':
        if os.path.isfile(args.model_path):
            print('loading model parameters from {}'.format(args.model_path))
            model.load_state_dict(torch.load(args.model_path))
        else:
            print('no found {}'.format(args.model_path))

    # Define loss function 、 optimizer and scheduler to adjust lr
    criterion = nn.CrossEntropyLoss().to(args.device)
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    scheduler = optim.lr_scheduler.StepLR(optimizer,
                                          step_size=args.step_size,
                                          gamma=args.gamma)

    best_result = {'epoch': 1, 'accuracy': 0.}
    # Load train and val dataset for training
    train_loader = data_loader(args, train=True, val=True)
    for epoch in range(args.begin_epoch, args.epochs + 1):
        adjust_learning_rate(scheduler)
        train_accuracy, train_loss = train_model(args, epoch, model,
                                                 train_loader, criterion,
                                                 optimizer, scheduler, None)

        print('=> Saving model...\n')
        file_path = os.path.join(args.result_path,
                                 'save_{}_{}.pth'.format(args.model, epoch))
        torch.save(model.state_dict(), file_path)
def run(param, args):
    source = os.path.basename(args.source).split('.')[0]
    target = os.path.basename(args.target).split('.')[0]
    flag = False if 'trainNum' == args.testType else True
    test_num = 10

    if flag:
        # Load source and target data
        param["source_data"], param["source_label"] = data.data_loader(args.source, param["inp_dims"], sample_num=25)
        # Encode labels into one-hot format
        clsNum = len(set(param["source_label"]))
        param["source_label"] = data.one_hot_encoding(param["source_label"], clsNum)
    else:
        print('will run train num test, so not loading training data at first')

    if 'nShot' == args.testType:
        print('run n_shot test...')
        n_shot_list = [1, 5, 10, 15, 20]
        #n_shot_list = [20]
        outfile = os.path.join(ResDir, 'ADA_one_source_{}_target_{}_res.txt'.format(source, target))
        f = open(outfile, 'a+')
        print('\n\n##################### test time is: {}####################'.format(time.ctime()), file=f, flush=True)
        for n_shot in n_shot_list:
            acc_list = []
            time_last_list = []
            for i in range(test_num):
                # Train phase
                signature_dict, test_dict, sites = utility.getDataDict(args.target, n_shot=n_shot, data_dim=param['inp_dims'], train_pool_size=20, test_size=70)
                target_data, target_label = mytools.datadict2data(signature_dict)
                print('target data shape: ', target_data.shape)
                target_data = target_data[:, :, np.newaxis]
                target_label = data.one_hot_encoding(target_label, len(set(target_label)))
                param["target_data"], param["target_label"] = target_data, target_label
                model_path, time_last = train(param, args)
                time_last_list.append(time_last)
                print('training time last: ', time_last)

                # Test phase
                test_opts = test.MyOpts(model_path, nShot=n_shot, tuning=True, aug=0, exp_type=args.exp_type)
                test_opts.nShot = n_shot
                test_params = test.generate_default_params(test_opts)
                inp_shape = (param["inp_dims"], 1)
                _, acc = test.run(test_opts, signature_dict, test_dict, params=test_params, emb_size=param['embsz'], inp_shape=inp_shape, test_times=1)
                acc_list.append(acc)
                print('acc of source {} and target {} with n_shot {} is: {:f}'.format(source, target, n_shot, acc))
            resLine = 'acc of source {} and target {} with n_shot {} is: {:f}, stdev is: {:f}, time last: {:f}\n\n'.format(source, target, n_shot, mean(acc_list), stdev(acc_list), mean(time_last_list))
            print(resLine, file=f, flush=True)
        f.close()
    elif 'aug' == args.testType:
        print('will run aug test...')
        pass
    elif 'trainNum' == args.testType:
        print('will run train num test...')
        n_shot = 20
        outfile = os.path.join(ResDir, 'trainNumTest_ADA_one_source_{}_target_{}_res.txt'.format(source, target))
        f = open(outfile, 'a+')
        print('\n\n################### test time is: {} ####################'.format(time.ctime()), file=f, flush=True)
        print('test with N shot num: {}'.format(n_shot), file=f, flush=True)
        trainNumList = [25, 50, 75, 100, 125]
        for trainNum in trainNumList:
            acc_list, time_last_list = [], []
            # load training data accord to the train num
            param["source_data"], param["source_label"] = data.data_loader(args.source, param["inp_dims"], sample_num=trainNum)
            print('train data shape is: ', np.array(param['source_data']).shape)
            clsNum = len(set(param["source_label"]))
            param["source_label"] = data.one_hot_encoding(param["source_label"], clsNum)

            for i in range(test_num):
                # Train phase
                signature_dict, test_dict, sites = utility.getDataDict(args.target, n_shot=n_shot, data_dim=param['inp_dims'], train_pool_size=20, test_size=70)
                target_data, target_label = mytools.datadict2data(signature_dict)
                target_data = target_data[:, :, np.newaxis]
                target_label = data.one_hot_encoding(target_label, len(set(target_label)))
                param["target_data"], param["target_label"] = target_data, target_label
                model_path, time_last = train(param, args)
                time_last_list.append(time_last)

                # Test phase
                test_opts = test.MyOpts(model_path, nShot=n_shot, tuning=True, aug=0, exp_type=args.exp_type)
                test_opts.nShot = n_shot
                test_params = test.generate_default_params(test_opts)
                inp_shape = (param["inp_dims"], 1)
                _, acc = test.run(test_opts, signature_dict, test_dict, params=test_params, emb_size=param['embsz'], inp_shape=inp_shape, test_times=1)
                acc_list.append(acc)
                print('acc of source {} and target {} with n_shot {} is: {:f}'.format(source, target, n_shot, acc))
            resLine = 'acc of source {} and target {} with n_shot {} is: {:f}, stdev is: {:f}, training time last: {:f}'.format(source, target, n_shot, mean(acc_list), stdev(acc_list), mean(time_last_list))
            print(resLine, file=f, flush=True)
        f.close()
    elif 'trainTime' == args.testType:
        # Train phase
        n_shot = 20
        signature_dict, test_dict, sites = utility.getDataDict(args.target, n_shot=n_shot, data_dim=param['inp_dims'], train_pool_size=20, test_size=70)
        target_data, target_label = mytools.datadict2data(signature_dict)
        target_data = target_data[:, :, np.newaxis]
        target_label = data.one_hot_encoding(target_label, len(set(target_label)))
        param["target_data"], param["target_label"] = target_data, target_label
        model_path, time_last = train(param, args)
        print('training time last: ', time_last)

    else:
        raise
示例#28
0
import requests
from starlette.testclient import TestClient

from data import data_loader, data_config

app = fastapi.FastAPI()
get_url = 'http://cyn.apustech.cn:8000/get_model'
post_url = 'http://cyn.apustech.cn:8000/send_data'
client = TestClient(app)
# get_url = 'http://localhost:8000/get_model'
# post_url = 'http://localhost:8000/send_data'

get_param = {
    "index": 10,
}
source_loader = data_loader(data_config.source_speed, data_config.data_type, 'source', is_fft=True)
data, label = next(iter(source_loader))
data = data[:2]
t = data.numpy()
print(t.dtype, t)
s: bytes = base64.b64encode(t)
post_param = {
    "signal": s.decode("utf-8")
}

# 这里是去请求后端
def get(url, param):
    data = requests.get(url, param)
    print(data)
    y = data.json()
    print(y)
示例#29
0
def main():
    # Record the best epoch and accuracy
    best_result = {'epoch': 1, 'accuracy': 0.}

    args = parse_args()
    # Use model name to name env's
    args.env = args.model
    vis = Visualize(env=args.env) if not args.close_visdom else None

    # Create file to storage result and checkpoint
    if args.root_path != '':
        args.result_path = os.path.join(args.root_path, args.result_path)
        args.checkpoint_path = os.path.join(args.root_path,
                                            args.checkpoint_path)
        args.pretrained_models_path = os.path.join(args.root_path,
                                                   args.pretrained_models_path)
        if not os.path.exists(args.result_path):
            os.mkdir(args.result_path)
        if not os.path.exists(args.checkpoint_path):
            os.mkdir(args.checkpoint_path)
        if not os.path.exists(args.pretrained_models_path):
            os.mkdir(args.pretrained_models_path)
        if args.resume_path:
            args.resume_path = os.path.join(args.checkpoint_path,
                                            args.resume_path)

    # Set manual seed to reproduce random value
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)

    # Create model
    if not torch.cuda.is_available():
        args.device = torch.device('cpu')
    else:
        args.device = torch.device(args.device)
    model = get_model(args)
    model.to(args.device)
    print(model)

    # Define loss function 、 optimizer and scheduler to adjust lr
    criterion = nn.CrossEntropyLoss().to(args.device)
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    scheduler = optim.lr_scheduler.StepLR(optimizer,
                                          step_size=args.step_size,
                                          gamma=args.gamma)
    # optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # Continue training from checkpoint epoch with checkpoint parameters
    if args.resume_path:
        if os.path.isfile(args.resume_path):
            print("=> loading checkpoint '{}'...".format(args.resume_path))
            checkpoint = torch.load(args.resume_path)
            args.begin_epoch = checkpoint['epoch'] + 1
            best_result = checkpoint['best_result']
            model.load_state_dict(checkpoint['model'])
            optimizer.load_state_dict(checkpoint['optimizer'])
        else:
            print("=> no checkpoint found at '{}'".format(args.resume_path))

    # Load dataset
    train_loader = data_loader(args, train=True)
    val_loader = data_loader(args, val=True)
    test_loader = data_loader(args, test=True)
    # Begin to train
    since = time.time()
    for epoch in range(args.begin_epoch, args.epochs + 1):
        adjust_learning_rate(scheduler)
        train_accuracy, train_loss = train_model(args, epoch, model,
                                                 train_loader, criterion,
                                                 optimizer, scheduler, vis)
        # Verify accuracy and loss after training
        val_accuracy, val_loss = val_model(args, epoch, best_result, model,
                                           val_loader, criterion, vis)

        # Plot train and val's accuracy and loss each epoch
        accuracy = [[train_accuracy], [val_accuracy]]
        loss = [[train_loss], [val_loss]]
        vis.plot2('accuracy', accuracy, ['train', 'val'])
        vis.plot2('loss', loss, ['train', 'val'])

        # Save checkpoint model each checkpoint interval and keep the last one
        if epoch % args.checkpoint_interval == 0 or epoch == args.epochs:
            save_checkpoint(args, epoch, best_result, model, optimizer,
                            scheduler)
    # Total time to train
    time_elapsed = time.time() - since
    print('Training complete in {}m {}s'.format(time_elapsed // 60,
                                                time_elapsed % 60))

    # Test model with the best val model parameters
    best_model_path = os.path.join(
        args.result_path, '{}_{}.pth'.format(args.model, best_result['epoch']))
    print("Using '{}' for test...".format(best_model_path))
    model.load_state_dict(torch.load(best_model_path))
    test_model(args, model, test_loader)
示例#30
0
文件: train.py 项目: zihsh/sophie
def main():
    train_path = get_dset_path(DATASET_NAME, 'train')
    val_path = get_dset_path(DATASET_NAME, 'val')
    long_dtype, float_dtype = get_dtypes()

    print("Initializing train dataset")
    train_dset, train_loader = data_loader(train_path)
    print("Initializing val dataset")
    _, val_loader = data_loader(val_path)

    iterations_per_epoch = len(train_dset) / D_STEPS
    NUM_ITERATIONS = int(iterations_per_epoch * NUM_EPOCHS)
    print('There are {} iterations per epoch'.format(iterations_per_epoch))

    generator = TrajectoryGenerator()
    generator.apply(init_weights)
    generator.type(float_dtype).train()
    print('Here is the generator:')
    print(generator)

    discriminator = TrajectoryDiscriminator()
    discriminator.apply(init_weights)
    discriminator.type(float_dtype).train()
    print('Here is the discriminator:')
    print(discriminator)

    optimizer_g = optim.Adam(generator.parameters(), lr=G_LR)
    optimizer_d = optim.Adam(discriminator.parameters(), lr=D_LR)

    t, epoch = 0, 0
    t0 = None
    min_ade = None
    while t < NUM_ITERATIONS:
        gc.collect()
        d_steps_left = D_STEPS
        g_steps_left = G_STEPS
        epoch += 1
        print('Starting epoch {}'.format(epoch))
        for batch in train_loader:

            if d_steps_left > 0:
                losses_d = discriminator_step(batch, generator, discriminator,
                                              gan_d_loss, optimizer_d)
                d_steps_left -= 1
            elif g_steps_left > 0:
                losses_g = generator_step(batch, generator, discriminator,
                                          gan_g_loss, optimizer_g)
                g_steps_left -= 1

            if d_steps_left > 0 or g_steps_left > 0:
                continue

            if t % PRINT_EVERY == 0:
                print('t = {} / {}'.format(t + 1, NUM_ITERATIONS))
                for k, v in sorted(losses_d.items()):
                    print('  [D] {}: {:.3f}'.format(k, v))
                for k, v in sorted(losses_g.items()):
                    print('  [G] {}: {:.3f}'.format(k, v))

                print('Checking stats on val ...')
                metrics_val = check_accuracy(val_loader, generator,
                                             discriminator, gan_d_loss)

                print('Checking stats on train ...')
                metrics_train = check_accuracy(train_loader,
                                               generator,
                                               discriminator,
                                               gan_d_loss,
                                               limit=True)

                for k, v in sorted(metrics_val.items()):
                    print('  [val] {}: {:.3f}'.format(k, v))
                for k, v in sorted(metrics_train.items()):
                    print('  [train] {}: {:.3f}'.format(k, v))

                if min_ade is None or metrics_val['ade'] < min_ade:
                    min_ade = metrics_val['ade']
                    checkpoint = {
                        't': t,
                        'g': generator.state_dict(),
                        'd': discriminator.state_dict(),
                        'g_optim': optimizer_g.state_dict(),
                        'd_optim': optimizer_d.state_dict()
                    }
                    print("Saving checkpoint to model.pt")
                    torch.save(checkpoint, "model.pt")
                    print("Done.")

            t += 1
            d_steps_left = D_STEPS
            g_steps_left = G_STEPS
            if t >= NUM_ITERATIONS:
                break