Exemplo n.º 1
0
def gen(l, m, n, number_of_instances, noise):
    assert(l <= m and m <= n)  # precondition

    # balanced dataset
    p_number_of_instances = int(number_of_instances / 2)
    n_number_of_instances = number_of_instances - p_number_of_instances

    # positive example
    p_y = np.ones((p_number_of_instances, 1), dtype=int)
    p_x_first_part = np.zeros((p_number_of_instances, m), dtype=int)
    p_x_second_part = (np.random.random((p_number_of_instances, n-m)) < 0.5)
    p_x_second_part = p_x_second_part.astype(int)

    # columnwise append
    p_x = np.append(p_x_first_part, p_x_second_part, axis=1)

    for i in range(p_number_of_instances):
        candidates = np.random.permutation(m)
        n_nonzeros = l
        active_features = candidates[:n_nonzeros]
        p_x[i][active_features] = 1  # set non_zeros to 1

    # negative example
    n_y = -1 * np.ones((n_number_of_instances, 1), dtype=int)
    n_x_first_part = np.zeros((n_number_of_instances, m), dtype=int)
    n_x_second_part = (np.random.random((n_number_of_instances, n-m)) < 0.5)
    n_x_second_part = n_x_second_part.astype(int)

    # columnwise append
    n_x = np.append(n_x_first_part, n_x_second_part, axis=1)

    for i in range(n_number_of_instances):
        candidates = np.random.permutation(m)
        n_nonzeros = l - 2
        active_features = candidates[:n_nonzeros]
        n_x[i][active_features] = 1  # set non_zero to 1

    y = np.append(p_y, n_y)
    x = np.append(p_x, n_x, axis=0)

    shuffle_indices = np.random.permutation(number_of_instances)
    y = y[shuffle_indices]
    x = x[shuffle_indices][:]

    # sanity check
    validate_dataset(y, x, l, m, n, number_of_instances)

    if noise is True:
        noise_y_rate = 0.05
        noise_x_rate = 0.001
        return add_noise(y, x, noise_y_rate, noise_x_rate)

    return (y, x)
Exemplo n.º 2
0
    max_features = params['max_features']
    max_depth = params['max_depth']
    loss = params['loss']
    learning_rate = params['learning_rate']

    missing_phenos = y[y.isnull()].index.values
    y = y.drop(missing_phenos, axis=0)
    geno_c = geno.copy()
    geno_c = geno_c.drop(missing_phenos, axis=0)
    r2s = []
    y = y.to_numpy()
    for (j, noise_ratio) in enumerate(noise_ratios):
        r2s_n = []
        for k in np.arange(M):

            y_n = add_noise(y, noise_ratio)

            X_train, X_test, y_train, y_test = train_test_split(geno_c,
                                                                y_n,
                                                                test_size=0.3)
            X_train = X_train.drop(columns=["Unnamed: 0"]).values
            X_test = X_test.drop(columns=["Unnamed: 0"]).values

            # ESTANADRIZANDO COMO SE DEBE
            y_train_std = (y_train - np.mean(y_train)) / np.std(y_train)
            y_test_std = (y_test - np.mean(y_train)) / np.std(y_train)

            rf = GradientBoostingRegressor(n_estimators = n_estimators, min_samples_split = min_samples_split,\
                                        min_samples_leaf = min_samples_leaf, max_features = max_features,\
                                        max_depth = max_depth, loss = loss, learning_rate = learning_rate,
                                        subsample = 1)
import numpy as np
from add_noise import add_noise
from simulated_annealing import simulated_anealing

################################
#    read image and add noise  #
################################
# read noise free image
img_noise_free = mpimg.imread('test1.bmp')
# convert it to gray level (two dimensions matrix instead of three dimensions matrix)
# convert to range [0,1]
img_noise_free_gl = np.double(np.average(img_noise_free, weights=[0.299, 0.587, 0.114], axis=2))/255

# add gaussian noise to image
variance = 0.05
img_gaussian_noise = add_noise(img=img_noise_free_gl, mean=0, variance=variance)

############################################
#    Find Parameters For singleton clique  #
############################################
# prior of each class- equal priors is assumed
priors = {0: 1/3, 127: 1/3, 255: 1/3}

# three regions in noisy image that belongs to different classes.
# we use these three region for estimating mean and standard deviation
# of each class.
regions = {0: (15, 15), 127: (120, 15), 255: (171, 149)}
len_of_region = 50

# calculate P(feature|class i), each of them it's an gaussian distribution
conditional_p = {0: None, 127: None, 255: None}
Exemplo n.º 4
0
def save_distributed_files(truth_type=None, corpus=None):
    """ 
  Save input and processed files for tests
  
  @param truth_type None for both, 'raw' for non processed and 'eq' for equalized
  """
    print("Started saving chunks of data")
    corpus_len = len(corpus)
    max_val = 0
    max_stft_len = 0
    # Find maximum length of time series data to pad
    for i in range(corpus_len):
        if len(corpus[i].data) > max_val:
            max_val = len(corpus[i].data)

    is_eq = False
    is_raw = False
    is_both = True

    if truth_type == 'eq':
        is_eq = True
        is_both = False
    elif truth_type == 'raw':
        is_raw = True
        is_both = False

    X = []
    y_eq = None
    y_raw = None

    if is_eq or is_both:
        y_eq = []
    if is_raw or is_both:
        y_raw = []

    memory_counter = 0
    pad_length = 0
    total_time = 0
    # Get each sentence from corpus and add random noise/echos/both to the input
    # and preprocess the output. Also pad the signals to the max_val
    for i in range(corpus_len):
        start = datetime.datetime.now()

        pad_length = max_val + NFFT // 2
        if pad_length % STACKED_FRAMES != 0:
            pad_length += (STACKED_FRAMES - (pad_length % STACKED_FRAMES))

        # Original data in time domain
        data_orig_td = corpus[i].data.astype(np.float64)
        yi = pad(deepcopy(data_orig_td), pad_length)
        # Sampling frequency
        fs = corpus[i].fs

        # Pad transformed signals
        echosample = add_echoes(data_orig_td)
        noisesample = add_noise(data_orig_td, path(NOISE_DIR,
                                                   "RainNoise.flac"))
        orig_sample = data_orig_td

        echosample = pad(echosample, pad_length)
        noisesample = pad(noisesample, pad_length)
        orig_sample = pad(orig_sample, pad_length)

        # Equalize data for high frequency hearing loss
        data_eq = None
        if is_eq or is_both:
            data_eq, _ = process_sentence(yi, fs=fs)
            yi_stft_eq = librosa.core.stft(data_eq,
                                           n_fft=NFFT,
                                           hop_length=HOP_LENGTH,
                                           center=True)
            yi_stft_eq = librosa.util.normalize(yi_stft_eq, axis=0)
            y_eq.append(np.abs(yi_stft_eq).T)

        # Use non processed input and pad as well
        data_raw = None
        if is_raw or is_both:
            data_raw = deepcopy(yi)
            yi_stft_raw = librosa.core.stft(data_raw,
                                            n_fft=NFFT,
                                            hop_length=HOP_LENGTH,
                                            center=True)
            yi_stft_raw = librosa.util.normalize(yi_stft_raw, axis=0)
            y_raw.append(np.abs(yi_stft_raw).T)

        #randomise which sample is input
        rand = random.randint(0, 1)
        random_sample_stft = None
        if rand == 0:
            random_sample_stft = librosa.core.stft(noisesample,
                                                   n_fft=NFFT,
                                                   hop_length=HOP_LENGTH,
                                                   center=True)
        else:
            random_sample_stft = librosa.core.stft(orig_sample,
                                                   n_fft=NFFT,
                                                   hop_length=HOP_LENGTH,
                                                   center=True)

        max_stft_len = random_sample_stft.shape[1]
        random_sample_stft = librosa.util.normalize(random_sample_stft, axis=0)
        X.append(np.abs(random_sample_stft).T)

        # print("Padded {}".format(i))
        dt = datetime.datetime.now() - start
        total_time += dt.total_seconds() * 1000
        avg_time = total_time / (i + 1)
        if (i % CHUNK == CHUNK - 1):
            print("Time taken for {}: {}ms".format(i, (i + 1) * avg_time))
            print("Saving temp npy file to CHUNK {}".format(memory_counter))
            # Convert to np arrays
            size = 0
            if is_eq or is_both:
                y_eq_temp = np.array(y_eq)
                size += sys.getsizeof(y_eq_temp)

            if is_raw or is_both:
                y_raw_temp = np.array(y_raw)
                size += sys.getsizeof(y_raw_temp)

            X_temp = np.array(X)
            size += sys.getsizeof(X_temp)

            print("Memory used: {}".format(size / (1024 * 1024)))

            # Save files
            np.save(os.path.join(PP_DATA_DIR, "model",
                                 "inputs_{}.npy".format(memory_counter)),
                    X_temp,
                    allow_pickle=True)

            if is_eq or is_both:
                np.save(
                    os.path.join(PP_DATA_DIR, "model",
                                 "truths_eq_{}.npy".format(memory_counter)),
                    y_eq_temp,
                    allow_pickle=True)
            if is_raw or is_both:
                np.save(os.path.join(
                    PP_DATA_DIR, "model",
                    "truths_raw_{}.npy".format(memory_counter)),
                        y_raw_temp,
                        allow_pickle=True)

            X = []
            y_eq = None
            y_raw = None

            if is_eq or is_both:
                y_eq = []
            if is_raw or is_both:
                y_raw = []

            memory_counter += 1

    if corpus_len % CHUNK > 0:
        # Convert to np arrays
        if is_eq or is_both:
            y_eq_temp = np.array(y_eq)

        if is_raw or is_both:
            y_raw_temp = np.array(y_raw)

        X_temp = np.array(X)
        end_len = len(X)

        # Save temp files
        np.save(os.path.join(PP_DATA_DIR, "model",
                             "inputs_{}.npy".format(memory_counter)),
                X_temp,
                allow_pickle=True)

        if is_eq or is_both:
            np.save(os.path.join(PP_DATA_DIR, "model",
                                 "truths_eq_{}.npy".format(memory_counter)),
                    y_eq_temp,
                    allow_pickle=True)
        if is_raw or is_both:
            np.save(os.path.join(PP_DATA_DIR, "model",
                                 "truths_raw_{}.npy".format(memory_counter)),
                    y_raw_temp,
                    allow_pickle=True)
        print("Saved blocks {}:{}".format(0, memory_counter * CHUNK + end_len))
        memory_counter += 1

    memory_counter = np.array(memory_counter)
    max_stft_len = np.array(max_stft_len)
    corpus_len = np.array(corpus_len)

    np.save(os.path.join(PP_DATA_DIR, "model", "memory_counter"),
            memory_counter)
    np.save(os.path.join(PP_DATA_DIR, "model", "max_stft_len"), max_stft_len)
    np.save(os.path.join(PP_DATA_DIR, "model", "corpus_len"), corpus_len)

    return memory_counter, max_stft_len, truth_type, corpus_len
Exemplo n.º 5
0
def diffusion():
    s = 0.4
    m = 0.3
    steps = 500

    ksize = 9
    sigma = 1.5
    gradksize = 9
    gradsigma = 1.5

    errors = []

    L = lab4.get_cameraman()
    L_noise = add_noise(L, 10)
    #L_noise = add_noise(L, 8)
    #L_noise = add_noise(L, 12)

    # ----------------- Multiplicative noise -------------------
    #L_log = np.log(L)
    #L_noise = add_noise(L_log, 40)
    #L_noise = np.exp(L_noise)

    # L = lab4.make_circle(128, 128, 120) * 255
    # gaussian = np.random.normal(0, 10, (L.shape[0], L.shape[1]))
    # L = L + gaussian

    fig1 = plt.figure(1)
    fig1.suptitle("Original Image w/ noise")
    plt.imshow(L)

    fig2 = plt.figure(2)
    fig2.suptitle("Enhanced Image")

    L_init = np.copy(L_noise)

    ax1 = plt.subplot(111)
    im1 = ax1.imshow(L)

    def init_func():
        pass

    def update(i):
        nonlocal L_noise

        T = estimate_T(L_noise, gradksize, gradsigma, ksize, sigma)

        D = estimate_D(T, m)

        HL = get_HL(L_noise)

        L_noise = L_noise + 0.5 * s * np.trace(D * HL, axis1=2, axis2=3)

        errors.append(np.sum(np.abs(L_noise - L)))
        # print(np.sum(np.abs(L_noise - L)))

        print("Steps:", i)

        im1.set_data(L_noise)

    ani = FuncAnimation(
        plt.gcf(),
        update,
        frames=range(steps),
        repeat=False,
        init_func=init_func,
    )
    plt.show()

    plt.plot(errors)
    plt.show()
Exemplo n.º 6
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Aug 31 21:25:07 2019

@author: Kuo
"""

from add_noise import add_noise

add_noise('insilico_size100_2_data', 15)
Exemplo n.º 7
0
import os
import cv2 as cv
import numpy as np
from tifffile import imsave
from add_noise import add_noise
from PIL import Image

src_dir='./small_data'
dst_dir='./aug_data_small'
for directory in os.listdir(src_dir):
    #for root, dirs, files in os.walk(os.path.join(src_dir,directory)):
    for dir in os.listdir(os.path.join(src_dir,directory)):
        for file in os.listdir(os.path.join(src_dir,directory,dir)):
            path=os.path.join(src_dir,directory,dir,file)
            print(path)
            img = cv.imread(path)
            if(img.shape!=(150,150,3)):
                img=cv.resize(img,(150,150))
            noisy_image = add_noise(img)
            dst_path=os.path.join(dst_dir,directory,dir,file)
            #cv.imwrite(dst_path,noisy_image)
            dst_path2=dst_path[:-4]
            imsave(dst_path2+'.tif', noisy_image)