示例#1
0
def main():
    # Disable output buffering
    #sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) 
    learning_rate = 0.001

    X_train, y_train = load('med')
    print 'Data loaded'

    net = BidirectionalLSTMNet()
    model = net.build_model()

    checkpointer = ModelCheckpoint(filepath='./experiments/lstm_test_med.hdf5', verbose=1, save_best_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=3, verbose=1)
    rmsprop = RMSprop(lr = learning_rate)
    model.compile(loss='binary_crossentropy', optimizer=rmsprop)
    model.fit(X_train, y_train, batch_size=100, class_weight='auto', nb_epoch=10, shuffle=True, validation_split=0.2, callbacks=[checkpointer, earlystopper])
示例#2
0
def load_patient(filepath, patient_index):

    # file handle
    f = data_utils.load(filepath)

    # get spectra and labels
    spectra = data_utils.get_spectra_metadata(f=f, patient_index=patient_index)
    raw_labels, raw_label_times = data_utils.get_label_metadata(f=f, patient_index=patient_index)

    # extract measurement and corresponding times
    measurements = spectra['time_series']
    measurement_times = spectra['measurement_times']
    mz = spectra['mz_vals']

    # focus on labeled measurements
    max_time = min(max(raw_label_times), max(measurement_times))
    min_time = max(min(raw_label_times), min(measurement_times))
    # cut measurements and times
    measurement_idx_to_keep = np.logical_and(measurement_times <= max_time, measurement_times >= min_time)
    raw_label_idx_to_keep = np.logical_and(raw_label_times <= max_time, raw_label_times >= min_time)
    measurements = measurements[measurement_idx_to_keep, :]
    measurement_times = measurement_times[measurement_idx_to_keep]
    # cut labels and times
    raw_labels = raw_labels[raw_label_idx_to_keep]
    raw_label_times = raw_label_times[raw_label_idx_to_keep]

    # label the measurements
    num_measurements = measurement_times.shape[0]
    labels = np.zeros(num_measurements)
    temporal_discrepancy = []
    for i in range(num_measurements):
        mtime = measurement_times[i]
        ltime, idx = find_nearest(raw_label_times, mtime)
        labels[i] = raw_labels[idx]
        temporal_discrepancy.append(np.abs(ltime - mtime))

    # some output
    temporal_discrepancy = np.array(temporal_discrepancy)
    print("Average temporal discrepancy: ", np.mean(temporal_discrepancy))
    print("STD  of temporal discrepancy: ", np.std(temporal_discrepancy))

    return measurements, labels, mz
示例#3
0
        val_split = val_y[tr_idx], val_y[val_idx]
        tr_gen, val_gen, test_gen, dummy_test_gen = get_argment_generator(
            train_split, val_split, train, test, BATCH_SIZE)

        h = model.fit_generator(tr_gen,
                                epochs=EPOCH,
                                steps_per_epoch=len(tr_x) // BATCH_SIZE,
                                validation_data=val_gen,
                                validation_steps=len(val_x) // BATCH_SIZE,
                                callbacks=callbacks)
        val_pred = model.predict(dummy_test_gen)
        test_pred = model.predict(test_gen)
        val_score = accuracy_score(np.argmax(val_split[1], 1), val_pred)
        print(f"fold_{i} val accuracy : {val_score}")
        pred.append(test_pred)

    mean_pred = np.mean(pred, axis=0)
    pred_y = np.argmax(mean_pred, 1)
    submission = pd.Series(pred_y, name='label')
    submission.to_csv(os.path.join(PATH, f"submission_fold{N_FOLD}.csv"),
                      header=True,
                      index_label='id')


train, test, y = load()
train_split, val_split = data_split(train, y, N_VAL)
tr_gen, val_gen, test_gen, dummy_test_gen = get_argment_generator(
    train_split, val_split, train, test, BATCH_SIZE)
tr_x, tr_y = train_split
val_x, val_y = val_split
示例#4
0
from matplotlib import pyplot

from data_utils import load
from data_stats import CDataStatsNoPreprocess, CDataStatsTrimmed, CDataStatsFrame

x, y = load('data/mnist.csv')

stats = [
    CDataStatsNoPreprocess(),
    CDataStatsTrimmed(30),
    CDataStatsFrame(28, 28, 5)
]
names = ['original', 'trimmed', 'framed']
output = ''

for i in range(3):
    digit = x[i]
    pyplot.figure(figsize=(12, 12))
    pyplot.subplot(2, 2, 1)
    pyplot.imshow(digit.reshape(28, 28))
    pyplot.title('The ' + str(i + 1) + '. digit')
    for j in range(len(stats)):
        pyplot.subplot(2, 2, j + 2)
        stat = stats[j]
        stat.histogram(digit)
        pyplot.title('Histogram, ' + names[j])
        output += 'The mean of the ' + str(
            i + 1) + '. digit, ' + names[j] + ': ' + str(
                stat.mean(digit)) + '\n'
    pyplot.savefig('digit' + str(i + 1) + '.pdf', dpi=1000)
示例#5
0
文件: main.py 项目: senze/rstudy
                    type=int,
                    default=4,
                    help="sample negative items for training")
parser.add_argument("--test_num_ng",
                    type=int,
                    default=99,
                    help="sample part of negative items for testing")
parser.add_argument("--out", default=True, help="save model or not")
parser.add_argument("--gpu", default="0", help="gpu card ID")
args = parser.parse_args()

os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
cudnn.benchmark = True

# PREPARE DATASET
train_data, test_data, user_num, item_num, train_mat = data_utils.load()

# construct the train and test datasets
train_dataset = data_utils.NCFData(train_data, item_num, train_mat,
                                   args.num_ng, True)
test_dataset = data_utils.NCFData(test_data, item_num, train_mat, 0, False)
train_loader = data.DataLoader(train_dataset,
                               batch_size=args.batch_size,
                               shuffle=True,
                               num_workers=4)
test_loader = data.DataLoader(test_dataset,
                              batch_size=args.test_num_ng + 1,
                              shuffle=False,
                              num_workers=0)

# CREATE MODEL
示例#6
0
    deconvolved = rl_deconv_all(image_masked_array_bilinear,
                                psf_array_linear,
                                iterations=20,
                                lbd=0.1)

    # Figures
    io.imsave('original.png', scale(downscaled))
    io.imsave('deconvolved.png', scale(deconvolved))
    plt.figure()
    fig, ax = plt.subplots()
    ax1 = plt.subplot(2, 2, 1)
    ax2 = plt.subplot(2, 2, 3)
    ax3 = plt.subplot(2, 2, 2)
    ax4 = plt.subplot(2, 2, 4)

    im1 = ax1.imshow(downscaled)
    ax1.set_title('Original')
    im2 = ax2.imshow(output_filtered_scaled[0][:, :], vmin=1, vmax=6)
    ax2.set_title('Detected FWMH X (px)')
    im3 = ax3.imshow(deconvolved)
    ax3.set_title('Deconvolved RL TV')
    im4 = ax4.imshow(output_filtered_scaled[1][:, :], vmin=1, vmax=6)
    ax4.set_title('Detected FWMH Y (px)')
    plt.show()


if __name__ == "__main__":
    model = load('models/model_26.pt')
    image = io.imread('data/fly.png')
    deconvolution_demo(image)
示例#7
0
import elastic_utils
import elastic_settings
import data_utils


if __name__ == '__main__':

    result_count = 5
    result_explain = True

    # build index with default settings
    elastic_utils.reindex(elastic_settings.english_analyzer(), data_utils.load())

    print("Search title, overview, cast, director:")
    query = elastic_utils.build_multi_match_query('patrick stewart',
                                                  ['title', 'overview', 'cast.name', 'directors.name'],
                                                  'best_fields', result_count, result_explain)
    results = elastic_utils.execute_search(query)
    elastic_utils.print_result_table(results, result_count)
    #elastic_utils.print_explain_from_results(results)
    # elastic_utils.print_results(results)
    #elastic_utils.print_explanation(results, result_count)

    print("\nDown-boost director:")
    query = elastic_utils.build_multi_match_query('patrick stewart',
                                                  ['title', 'overview', 'cast.name', 'directors.name^0.1'],
                                                  'best_fields', result_count, result_explain)
    results = elastic_utils.execute_search(query)
    elastic_utils.print_result_table(results, result_count)

    print("\nReindexing with bigrams")
示例#8
0
def reindex():
    elastic_utils.reindex(elastic_settings.english_analyzer(), data_utils.load())
示例#9
0
    compute_grid(output_filtered_scaled[0],downscaled)
    grid_z1 = load_grid(psf_array_linear.shape[0])
    for i, current_psf in enumerate(psf_array_linear):
        log.info('Detected PSF {} with focus x {} y {}'.format(i, flattened_map[0][i],flattened_map[1][i]))
        image_masked_array_bilinear.append(np.multiply(grid_z1[i], downscaled))

    deconvolved = rl_deconv_all(image_masked_array_bilinear, psf_array_linear, iterations=20, lbd=0.1)

    # Figures
    io.imsave('original.png', scale(downscaled))
    io.imsave('deconvolved.png', scale(deconvolved))
    fig, ax = plt.subplots()
    ax1 = plt.subplot(2, 2, 1)
    ax2 = plt.subplot(2, 2, 3)
    ax3 = plt.subplot(2, 2, 2)
    ax4 = plt.subplot(2, 2, 4)

    im1 = ax1.imshow(downscaled)
    ax1.set_title('Original')
    im2 = ax2.imshow(output_filtered_scaled[0][:,:], vmin=0.5, vmax=4)
    ax2.set_title('Detected FWMH X (px)')
    im3 = ax3.imshow(deconvolved)
    ax3.set_title('Deconvolved RL TV')
    im4 = ax4.imshow(output_filtered_scaled[1][:,:], vmin=0.5, vmax=4)
    ax4.set_title('Detected FWMH Y (px)')
    plt.show()

if __name__ == "__main__":
    model = load('models/model_999.pt')
    image = io.imread('data/fly.png')
    deconvolution_demo(image)
示例#10
0
def main(args):
    device = torch.device("cuda:0")

    # model hyperparameters
    dataset = args.dataset
    batch_size = args.batch_size
    hps = Hyperparameters(base_dim=args.base_dim,
                          res_blocks=args.res_blocks,
                          bottleneck=args.bottleneck,
                          skip=args.skip,
                          weight_norm=args.weight_norm,
                          coupling_bn=args.coupling_bn,
                          affine=args.affine)
    scale_reg = 5e-5  # L2 regularization strength

    # optimization hyperparameters
    lr = args.lr
    momentum = args.momentum
    decay = args.decay

    # prefix for images and checkpoints
    filename = 'bs%d_' % batch_size \
             + 'normal_' \
             + 'bd%d_' % hps.base_dim \
             + 'rb%d_' % hps.res_blocks \
             + 'bn%d_' % hps.bottleneck \
             + 'sk%d_' % hps.skip \
             + 'wn%d_' % hps.weight_norm \
             + 'cb%d_' % hps.coupling_bn \
             + 'af%d' % hps.affine \

    # load dataset
    train_split, val_split, data_info = data_utils.load(dataset)
    train_loader = torch.utils.data.DataLoader(train_split,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=2)
    val_loader = torch.utils.data.DataLoader(val_split,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=2)

    prior = distributions.Normal(  # isotropic standard normal distribution
        torch.tensor(0.).to(device),
        torch.tensor(1.).to(device))
    flow = realnvp.RealNVP(datainfo=data_info, prior=prior, hps=hps).to(device)
    optimizer = optim.Adamax(flow.parameters(),
                             lr=lr,
                             betas=(momentum, decay),
                             eps=1e-7)

    epoch = 0
    running_loss = 0.
    running_log_ll = 0.
    optimal_log_ll = float('-inf')
    early_stop = 0

    image_size = data_info.channel * data_info.size**2  # full image dimension

    while epoch < args.max_epoch:
        epoch += 1
        print('Epoch %d:' % epoch)
        flow.train()
        for batch_idx, data in enumerate(train_loader, 1):
            optimizer.zero_grad()
            x, _ = data
            # log-determinant of Jacobian from the logit transform
            x, log_det = data_utils.logit_transform(x)
            x = x.to(device)
            log_det = log_det.to(device)

            # log-likelihood of input minibatch
            log_ll, weight_scale = flow(x)
            log_ll = (log_ll + log_det).mean()

            # add L2 regularization on scaling factors
            loss = -log_ll + scale_reg * weight_scale
            running_loss += loss.item()
            running_log_ll += log_ll.item()

            loss.backward()
            optimizer.step()

            if batch_idx % 10 == 0:
                bit_per_dim = (-log_ll.item() + np.log(256.) * image_size) \
                    / (image_size * np.log(2.))
                print('[%d/%d]\tloss: %.3f\tlog-ll: %.3f\tbits/dim: %.3f' % \
                    (batch_idx*batch_size, len(train_loader.dataset),
                        loss.item(), log_ll.item(), bit_per_dim))

        mean_loss = running_loss / batch_idx
        mean_log_ll = running_log_ll / batch_idx
        mean_bit_per_dim = (-mean_log_ll + np.log(256.) * image_size) \
             / (image_size * np.log(2.))
        print('===> Average train loss: %.3f' % mean_loss)
        print('===> Average train log-likelihood: %.3f' % mean_log_ll)
        print('===> Average train bit_per_dim: %.3f' % mean_bit_per_dim)
        running_loss = 0.
        running_log_ll = 0.

        flow.eval()
        with torch.no_grad():
            for batch_idx, data in enumerate(val_loader, 1):
                x, _ = data
                x, log_det = data_utils.logit_transform(x)
                x = x.to(device)
                log_det = log_det.to(device)

                # log-likelihood of input minibatch
                log_ll, weight_scale = flow(x)
                log_ll = (log_ll + log_det).mean()

                # add L2 regularization on scaling factors
                loss = -log_ll + scale_reg * weight_scale
                running_loss += loss.item()
                running_log_ll += log_ll.item()

            mean_loss = running_loss / batch_idx
            mean_log_ll = running_log_ll / batch_idx
            mean_bit_per_dim = (-mean_log_ll + np.log(256.) * image_size) \
                / (image_size * np.log(2.))
            print('===> Average validation loss: %.3f' % mean_loss)
            print('===> Average validation log-likelihood: %.3f' % mean_log_ll)
            print('===> Average validation bits/dim: %.3f' % mean_bit_per_dim)
            running_loss = 0.
            running_log_ll = 0.

            samples = flow.sample(args.sample_size)
            samples, _ = data_utils.logit_transform(samples, reverse=True)
            utils.save_image(
                utils.make_grid(samples),
                './samples/' + dataset + '/' + filename + '_ep%d.png' % epoch)

        if mean_log_ll > optimal_log_ll:
            early_stop = 0
            optimal_log_ll = mean_log_ll
            torch.save(flow, './models/' + dataset + '/' + filename + '.model')
            print('[MODEL SAVED]')
        else:
            early_stop += 1
            if early_stop >= 100:
                break

        print('--> Early stopping %d/100 (BEST validation log-likelihood: %.3f)' \
            % (early_stop, optimal_log_ll))

    print('Training finished at epoch %d.' % epoch)
示例#11
0
batch_size = 100
num_train = 49000
num_val = 1000
num_test = 1000
dropout = 0.5
display_step = 10
reg = 0.1
drop = False
learning_rate = 0.01
first_layer = 20
second_layer = 50
verbose = True
test = True
fsize = 5

Xtr, Ytr, Xte, Yte = load(path)

i_placeholder = tf.placeholder(tf.float32, shape=[batch_size,32,32,3], name='images')
l_placeholder = tf.placeholder(tf.float32, shape=(batch_size,20,),name='labels')
drop_placeholder = tf.placeholder(tf.float32)


def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_nxn(x, n):
    return tf.nn.max_pool(x, ksize=[1, n, n, 1], strides=[1, n, n, 1], padding='SAME')

def one_hot(labels, num_classes=20):
    num_labels = labels.shape[0]
    index_offset = np.arange(num_labels) * num_classes
示例#12
0
    def train(self, check_point=None):
        self.device = 'cuda:0' if self.config.training.use_gpu else 'cpu'

        train_data, _ = data_utils.load(self.config.data.dataset)
        image_loader = data_utils.InfiniteLoader(
            dset.DataLoader(dataset=train_data,
                            batch_size=self.config.training.batch_size,
                            drop_last=True,
                            shuffle=True,
                            num_workers=2))

        if check_point is not None:
            try:
                states = torch.load(check_point)
            except FileNotFoundError:
                print("Check point is not Found...\n")
            netD = Discriminator(self.config.data.channels).to(self.device)
            netG = Generator(self.config.data.channels).to(self.device)
            netD.load_state_dict(states[0])
            netG.load_state_dict(states[1])
            optD = self.set_optimizer(netD.parameters())
            optG = self.set_optimizer(netG.parameters())
            optD.load_state_dict(states[2])
            optG.load_state_dict(states[3])
        else:
            netD = Discriminator(self.config.data.channels).to(self.device)
            netG = Generator(self.config.data.channels).to(self.device)
            optD = self.set_optimizer(netD.parameters())
            optG = self.set_optimizer(netG.parameters())

        writer = SummaryWriter(log_dir=self.config.training.log_dir)
        one = torch.tensor(1., dtype=torch.float).to(self.device)
        mone = (one * -1).to(self.device)
        tbar = tqdm(range(self.config.training.max_iter))

        for g_iter in tbar:

            for p in netD.parameters():
                p.requires_grad = True

            d_loss_real = 0.
            d_loss_fake = 0.
            Wasserstein_D = 0.
            # netD optimization
            for d_iter in range(self.config.training.critic_iter):
                netD.zero_grad()

                real_images, _ = next(image_loader)
                real_images = real_images.to(self.device)
                z = torch.randn([
                    self.config.training.batch_size,
                    self.config.model.hidden_dim, 1, 1
                ]).to(self.device)

                d_loss_real = netD(real_images)
                d_loss_real = d_loss_real.mean()
                d_loss_real.backward(mone)

                fake_images = netG(z)
                d_loss_fake = netD(fake_images)
                d_loss_fake = d_loss_fake.mean()
                d_loss_fake.backward(one)

                gradient_penalty = self.calc_gradient_penalty(
                    netD, real_images, fake_images)
                gradient_penalty.backward()

                d_loss = d_loss_fake - d_loss_real + gradient_penalty
                Wasserstein_D = d_loss_real - d_loss_fake

                optD.step()

            # netG optimization
            for p in netD.parameters():
                p.requires_grad = False
            netG.zero_grad()

            z = torch.randn([
                self.config.training.batch_size, self.config.model.hidden_dim,
                1, 1
            ]).to(self.device)
            fake_images = netG(z)

            g_loss = netD(fake_images)
            g_loss = g_loss.mean()
            g_loss.backward(mone)

            optG.step()

            if (g_iter + 1) % self.config.training.minitor_iter == 0:
                # Using Tensorboard to log training
                log = {
                    'wasserstein_distance': Wasserstein_D.item(),
                    'loss_D': d_loss.item(),
                    'loss_G': g_loss.item(),
                    'loss_D_real': d_loss_real.item(),
                    'loss_G_fake': d_loss_fake.item()
                }
                info = "[{}/{}], loss_D: {:.3f}, loss_G: {:.3f}, W_distance: {:.3f}".format(
                    g_iter + 1, self.config.training.max_iter, d_loss.item(),
                    -g_loss.item(), Wasserstein_D.item())
                tbar.set_description(info)

                for key, value in log.items():
                    writer.add_scalar(key, value, g_iter + 1)

            if (g_iter + 1) % self.config.training.save_iter == 0:

                real_images = real_images.mul(0.5).add(0.5).cpu()
                z = torch.randn([
                    self.config.training.batch_size,
                    self.config.model.hidden_dim, 1, 1
                ]).to(self.device)
                samples = netG(z)
                samples = samples.mul(0.5).add(0.5).cpu()
                grid = utils.make_grid(samples)
                utils.save_image(
                    grid,
                    os.path.join(self.args.image_path,
                                 self.config.data.dataset,
                                 "iters_%d.png" % (g_iter + 1)))

                image_log = {
                    'real_image': real_images,
                    'generated_image': samples
                }
                for key, value in image_log.items():
                    writer.add_images(key, value, g_iter + 1)

                states = [
                    netD.state_dict(),
                    netG.state_dict(),
                    optD.state_dict(),
                    optG.state_dict()
                ]
                torch.save(
                    states,
                    os.path.join(self.config.training.check_point,
                                 'checkpoint_{}.pth'.format(g_iter + 1)))
                torch.save(
                    states,
                    os.path.join(self.config.training.check_point,
                                 'checkpoint.pth'))
示例#13
0
def reindex(name, settings):
    print("\nReindexing with settings: {}".format(name))

    elastic_utils.reindex(elastic_settings.english_bigrams(), data_utils.load())