Python MinMaxScaler.astype示例，sklearn.preprocessing.MinMaxScaler.astype Python示例

示例#1

0

显示文件

文件： lhs_combined_likelihood.py 项目： DesignInformaticsLab/EcoRacer2016

def cuml_like(arr1, arr2):
    arr1=MinMaxScaler().fit_transform(arr1.astype(float).reshape(-1,1))
    arr2=MinMaxScaler().fit_transform(arr2.astype(float).reshape(-1,1))
    if arr1.size!=arr2.size:
        raise Exception('must be equal-sized arrays arr1 and arr2')
    new = np.zeros_like(arr1)
    for n in range(new.size):
        new[n] = arr1[:n+1].sum()+arr2[n+1:].sum()
    return new

示例#2

0

显示文件

def cuml_like(arr1, arr2):
    arr1 = MinMaxScaler().fit_transform(arr1.astype(float).reshape(-1, 1))
    arr2 = MinMaxScaler().fit_transform(arr2.astype(float).reshape(-1, 1))
    if arr1.size != arr2.size:
        raise Exception('must be equal-sized arrays arr1 and arr2')
    new = np.zeros_like(arr1)
    for n in range(new.size):
        new[n] = arr1[:n + 1].sum() + arr2[n + 1:].sum()
    return new

示例#3

0

显示文件

文件： regression_test.py 项目： bootphon/phonrulemodel

def make_data(n_samples=1000, n_features=1, n_targets=1, informative_prop=1.0,
              noise=0.0, test_prop=0.1, valid_prop=0.3, method='linear'):
    if method == 'linear':
        params = dict(n_features=n_features,
                      n_informative=int(n_features*informative_prop),
                      noise=noise,
                      n_targets=n_targets,
                      n_samples=n_samples,
                      shuffle=False,
                      bias=0.0)
        X, Y = make_regression(**params)
    elif method == 'boston':
        boston = load_boston()
        X = boston.data
        Y = boston.target
    else:
        params = dict(n_samples=n_samples,
                      n_features=n_features)
        X, Y = make_friedman3(n_samples=n_samples, n_features=n_features,
                                 noise=noise)

    X = MinMaxScaler(feature_range=(0.0,1.0)).fit_transform(X)
    X = X.astype(theano.config.floatX)
    Y = MinMaxScaler(feature_range=(0.0,1.0)).fit_transform(Y)
    Y = Y.astype(theano.config.floatX)
    if len(X.shape) > 1:
        n_features = X.shape[1]
    else:
        X = X.reshape(X.shape[0], -1)
        n_features = 1
    if len(Y.shape) > 1:
        n_targets = Y.shape[1]
    else:
        Y = Y.reshape(Y.shape[0], -1)
        n_targets = 1

    X_train, Y_train, X_valid, Y_valid, X_test, Y_test = \
        train_valid_test_split(X, Y,
                               test_prop=valid_prop, valid_prop=valid_prop)
    return dict(
        X_train=theano.shared(X_train),
        Y_train=theano.shared(Y_train),
        X_valid=theano.shared(X_valid),
        Y_valid=theano.shared(Y_valid),
        X_test=theano.shared(X_test),
        Y_test=theano.shared(Y_test),
        num_examples_train=X_train.shape[0],
        num_examples_valid=X_valid.shape[0],
        num_examples_test=X_test.shape[0],
        input_dim=n_features,
        output_dim=n_targets)

示例#4

0

显示文件

文件： __init__.py 项目： nyu-tandon-hsn-ai/trace-feature-selection

def normalize_to(data, to_low, to_high):
    """
    Normalize data

    Parameters
    ----------
    data: list[float]
    to_low: int
        Min range
    to_high: int
        Max range
    
    Returns
    -------
    `numpy.ndarray`
        Scaled data
    """

    # convert to `numpy.ndarray`
    data = np.array(data)

    # scale data
    scaled_data = MinMaxScaler(feature_range=(to_low, to_high)).fit_transform(data.reshape(-1,1)).ravel()

    # convert to int
    return scaled_data.astype(np.int32)

示例#5

0

显示文件

def crops_from_trial(X, y, crop_len, stride=0, time_last=True, dummy_idx=0, normalize=True):
    crop_len = int(crop_len)
    x_list, y_list = list(), list()
    if stride > 0:
        num_valid_crops = int((X.shape[0] - crop_len) / stride) + 1
    else:
        num_valid_crops = int(X.shape[0] // crop_len)

    for crop in range(num_valid_crops):
        if stride > 0:
            crop_idx = int(crop * stride)
        else:
            crop_idx = int(crop * crop_len)

        x_crop = X[crop_idx:crop_idx + crop_len, ]
        y_crop = y[crop_idx:crop_idx + crop_len, ]
        if normalize:
            y_crop = MinMaxScaler(feature_range=(-1, 1)).fit_transform(y_crop.reshape(-1, 1)).squeeze()
            x_crop = exponential_running_standardize(x_crop, init_block_size=250, factor_new=0.001, eps=1e-4)

        x_list.append(
            np.expand_dims(x_crop.T if time_last else x_crop, axis=dummy_idx).astype(np.float32)
        )
        y_list.append(y_crop.astype(np.float32))
    return x_list, y_list

示例#6

0

显示文件

文件： data_import.py 项目： sarikayamehmet/convolutional-autoencoder-trading

def prepare_data(df, n, step, test_size=0.3):
    delta = df.drop('volume', axis=1).pct_change()
    log_volume_delta = np.log(df.volume) - np.log(df.volume.shift(1))
    delta['volume'] = log_volume_delta
    delta = delta.dropna(how='all')
    df = df.iloc[1:, :]
    nrows = delta.shape[0]
    i = 0
    X = []
    y = []
    while True:
        x_start, x_end, y_start, y_end = get_idx(i, n, step)
        if y_end > nrows - 1:
            break
        x = delta.iloc[x_start:x_end, :].values
        x = MinMaxScaler().fit_transform(x) * 255
        X.append(x.astype('int'))
        y.append((df.iloc[y_end, :].close - df.iloc[y_start, :].close) /
                 df.iloc[y_start, :].close)
        i += 1
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=test_size)
    X_train, X_test = np.expand_dims(X_train, -1), np.expand_dims(X_test, -1)
    y_train, y_test = np.array(y_train) >= 0, np.array(y_test) >= 0
    return X_train, X_test, y_train * 1.0, y_test * 1.0

示例#7

0

显示文件

文件： generate_comparison_figures.py 项目： anonymousresearcher12/Concrete-Autoencoders

def load_mice(one_hot=False):
    filling_value = -100000

    X = np.genfromtxt('datasets/Data_Cortex_Nuclear.csv',
                      delimiter=',',
                      skip_header=1,
                      usecols=range(1, 78),
                      filling_values=filling_value,
                      encoding='UTF-8')
    classes = np.genfromtxt('datasets/Data_Cortex_Nuclear.csv',
                            delimiter=',',
                            skip_header=1,
                            usecols=range(78, 81),
                            dtype=None,
                            encoding='UTF-8')

    for i, row in enumerate(X):
        for j, val in enumerate(row):
            if val == filling_value:
                X[i, j] = np.mean([
                    X[k, j] for k in range(classes.shape[0])
                    if np.all(classes[i] == classes[k])
                ])

    DY = np.zeros((classes.shape[0]), dtype=np.uint8)
    for i, row in enumerate(classes):
        for j, (val,
                label) in enumerate(zip(row, ['Control', 'Memantine', 'C/S'])):
            DY[i] += (2**j) * (val == label)

    Y = np.zeros((DY.shape[0], np.unique(DY).shape[0]))
    for idx, val in enumerate(DY):
        Y[idx, val] = 1

    X = MinMaxScaler(feature_range=(0, 1)).fit_transform(X)

    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    X = X[indices]
    Y = Y[indices]
    DY = DY[indices]
    classes = classes[indices]

    if not one_hot:
        Y = DY

    X = X.astype(np.float32)
    Y = Y.astype(np.float32)

    print(X.shape, Y.shape)

    return (X[:X.shape[0] * 4 // 5],
            Y[:X.shape[0] * 4 // 5]), (X[X.shape[0] * 4 // 5:],
                                       Y[X.shape[0] * 4 // 5:])

示例#8

0

显示文件

文件： train_acoustic_model.py 项目： bootphon/phonrulemodel

def build_dataset(X, y, labels, test_prop=0.2, valid_prop=0.2,
                  register='both', test=False):
    if register in ['IDS', 'ADS']:
        sel_ixs = np.in1d(y, np.nonzero(labels[:, 1]==register))
        X = X[sel_ixs]
        y = y[sel_ixs]
    elif register == 'both': # merge IDS and ADS labels per phone
        ix2phone = dict(enumerate(labels[:, 0]))
        phones = sorted(set(ix2phone.values()))
        phone2newix = {p:ix for ix, p in enumerate(phones)}
        y = np.array([phone2newix[ix2phone[i]] for i in y])
    else:
        raise ValueError('invalid option for register: {0}'.format(register))

    oldix2newix = {old_ix:new_ix for new_ix, old_ix in enumerate(np.unique(y))}
    y = np.array([oldix2newix[i] for i in y])

    # X = StandardScaler().fit_transform(X)
    X = MinMaxScaler(feature_range=(0,1)).fit_transform(X)
    X = X.astype(theano.config.floatX)
    y = y.astype('int32')
    nclasses = np.unique(y).shape[0]
    nfeatures = X.shape[1]

    X_train, y_train, X_valid, y_valid, X_test, y_test = \
        train_valid_test_split(X, y,
                               test_prop=test_prop, valid_prop=valid_prop)

    if test:
        X = X_train[100:200]
        y = y_train[100:200]
        X_train = X_train[:100]
        y_train = y_train[:100]
        X_valid = X_valid[:10]
        y_valid = y_valid[:10]
        X_test = X_test[:50]
        y_test = y_test[:50]

    return dict(
        X_train=theano.shared(X_train),
        y_train=theano.shared(y_train),
        X_valid=theano.shared(X_valid),
        y_valid=theano.shared(y_valid),
        X_test=theano.shared(X_test),
        y_test=theano.shared(y_test),
        num_examples_train=X_train.shape[0],
        num_examples_valid=X_valid.shape[0],
        num_examples_test=X_test.shape[0],
        input_dim=nfeatures,
        output_dim=nclasses,
        labels=labels
    )

示例#9

0

显示文件

文件： main.py 项目： yoav1412/gan-mnist-image-generation

def load_real_train_data():
    """
    :return: numpy array of real MNIST images
    """
    (trainX, trainy), (_, _) = load_data()  # Load MNIST data
    # normalize:
    d0, d1, d2 = trainX.shape
    trainX = MinMaxScaler(
        (-1, 1)).fit_transform(trainX.reshape(d0,
                                              d1 * d2)).reshape(d0, d1, d2)
    trainX = np.expand_dims(trainX.astype('float32'), axis=-1)

    return trainX

示例#10

0

显示文件

    def __getitem__(self, idx):
        # read sound samples from file
        sound_samples, sampling_rate, label = Sound.read_sound(self, idx)

        mel = librosa.feature.melspectrogram(y=sound_samples, sr=sampling_rate, \
                                             n_fft=self.nfft, hop_length=self.hop_len, n_mels=self.mels)
        mel = librosa.power_to_db(mel, np.max)
        if self.truncate:
            mel = adjust_matrix(mel, 2**closest_power_2(mel.shape[0]),
                                2**closest_power_2(mel.shape[1]))

        initial_shape = mel.shape
        mel_scaled_spectrogram_db = MinMaxScaler().fit_transform(
            mel.reshape(-1, 1)).reshape((1, *initial_shape))
        mel_scaled_spectrogram_db = mel_scaled_spectrogram_db.astype(
            np.float32)

        return [mel_scaled_spectrogram_db], label

示例#11

0

显示文件

文件： training.py 项目： raimondifranc/gpcr_coupling_predictor

def grid_search(file):
    df = pd.read_table(file, lineterminator='\n', sep='\t')
    col = list(df.columns.values)

    df[col[1:-1]] = df[col[1:-1]].astype(float)
    df[col[-1]] = df[col[-1]].astype(int)
    X = df[col[1:-1]].as_matrix()
    X = MinMaxScaler(feature_range=(0.0, 1.0)).fit_transform(X)
    X = X.astype(float, order='C')
    Y = df[col[-1]].as_matrix()
    Y = Y.astype(float, order='C')

    parameters = {
        'C': [0.01, 0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0, 100000.0],
        'solver': ['lbfgs'],
        'max_iter': [100, 250, 500, 750, 1000, 1500, 2500]
    }
    lr = log_reg(penalty='l2', class_weight='balanced')
    model = GridSearchCV(lr, parameters, cv=5, scoring='roc_auc', n_jobs=5)
    model.fit(X, Y)
    return model.best_params_, df

示例#12

0

显示文件

文件： periodogram_dataset.py 项目： WN1695173791/buzz-models

    def get_item(self, idx):
        """ Function for getting periodogram """
        # read sound samples from file
        sound_samples, sampling_rate, labels = Sound.read_sound(self,
                                                                idx=idx,
                                                                raw=True)

        periodogram = abs(np.fft.rfft(sound_samples, sampling_rate))[1:]
        if self.scale_db:
            periodogram = 20 * np.log10(
                periodogram / np.iinfo(sound_samples[0]).max)
        frequencies = np.fft.rfftfreq(sampling_rate,
                                      d=(1. / sampling_rate))[1:]

        if self.slice_freq:
            periodogram = periodogram[self.slice_freq[0]:self.slice_freq[1]]
            frequencies = frequencies[self.slice_freq[0]:self.slice_freq[1]]

        if self.scale:
            periodogram = MinMaxScaler().fit_transform(
                periodogram.reshape(-1, 1)).squeeze()

        periodogram = periodogram.astype(np.float32)
        return (periodogram, frequencies), labels

示例#13

0

显示文件

cD3.fill(0)
for i in range(1, len(coeffs)-3):
    coeffs[i]=pywt.threshold(coeffs[i], threshold)
rdata = pywt.waverec(coeffs=coeffs, wavelet='db5')
print("="*30)
print("showing your ecgdata")
plt.figure(figsize=(20,4))
plt.subplot(3,1,1)
plt.plot(data)
plt.title("raw data")
plt.subplot(3,1,2)
plt.plot(rdata)
plt.title("new data")
plt.savefig('D:\\anaconda3\\envs\\myTensorflow\\ECG\\Tang\\ecgtest3.png')
plt.show()

print("="*30)
print("analysing your ecgdata using ECGNet")
tt = np.array(rdata).reshape((5000,1))
tt = MinMaxScaler(feature_range=(0,1)).fit_transform(tt)
interpreter.allocate_tensors()
inputIndex=interpreter.get_input_details()[0]["index"]
outputIndex = interpreter.get_output_details()[0]["index"]
tt = tt.reshape((-1,5000,1,1))
tt = tt.astype(np.float32)
interpreter.set_tensor(inputIndex, tt)
interpreter.invoke()
prediction = interpreter.get_tensor(outputIndex)[0]
print(prediction)
print("so far,you are healthy. keep exercising and stay fit.")

示例#14

0

显示文件

文件： tf_stacked_ae.py 项目： ncullen93/tensorflow_portfolio

                    layers[i+1] = tf.matmul(layers[i],w)+b
        
        # create phases
        mid_idx = int((len(w_dict)-1) / 2)
        n_phases = mid_idx
        phase_training_ops = []

        for phase_idx in range(n_phases):
            pass

if __name__=='__main__':
    tf.reset_default_graph()
    from keras.datasets.mnist import load_data
    (xtrain,xtest),(ytrain,ytest) = load_data()
    from sklearn.preprocessing import StandardScaler, MinMaxScaler
    xtrain = MinMaxScaler((0,1)).fit_transform(xtrain.astype('float64').reshape(xtrain.shape[0],-1))


    h_layers = [300, 150, 300]
    act_fn = 'elu'
    lr = 1e-2
    l2_pen = 1e-4
    sae = tfStackedAutoEncoder(h_layers=h_layers)
    layers = sae.fit(xtrain)

示例#15

0

显示文件

import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.cross_validation import train_test_split
import theanets
import climate

climate.enable_default_logging()

X_orig = np.load('/Users/bzamecnik/Documents/music-processing/music-processing-experiments/c-scale-piano_spectrogram_2048_hamming.npy')
sample_count, feature_count = X_orig.shape
X = MinMaxScaler().fit_transform(X_orig)
X = X.astype(np.float32)

X_train, X_test = train_test_split(X, test_size=0.4, random_state=42)
X_val, X_test = train_test_split(X_test, test_size=0.5, random_state=42)

# (np.maximum(0, 44100/512*np.arange(13)-2)).astype('int')
#blocks = [0, 84, 170, 256, 342, 428, 514, 600, 687, 773, 859, 945, 1031, 1205]
blocks = [0, 48, 98, 148, 198, 248, 298, 348, 398, 448, 498, 548, 598, 700]

def make_labels(blocks):
    label_count = len(blocks) - 1
    labels = np.zeros(blocks[-1])
    for i in range(label_count):
        labels[blocks[i]:blocks[i+1]] = i
    return labels

y = make_labels(blocks)

def score(exp, Xs):
    X_train, X_val, X_test = Xs

示例#16

0

显示文件

def vad(data, fs, fs_vad=16000, hop_length=30, vad_mode=0):
    """ Voice activity detection.
    This was implementioned for easier use of py-webrtcvad.
    Parameters
    ----------
    data : ndarray
        numpy array of mono (1 ch) speech data.
        1-d or 2-d, if 2-d, shape must be (1, time_length) or (time_length, 1).
        if data type is int, -32768 < data < 32767.
        if data type is float, -1 < data < 1.
    fs : int
        Sampling frequency of data.
    fs_vad : int, optional
        Sampling frequency for webrtcvad.
        fs_vad must be 8000, 16000, 32000 or 48000.
        Default is 16000.
    hop_length : int, optional
        Step size[milli second].
        hop_length must be 10, 20, or 30.
        Default is 0.1.
    vad_mode : int, optional
        set vad aggressiveness.
        As vad_mode increases, it becomes more aggressive.
        vad_mode must be 0, 1, 2 or 3.
        Default is 0.

    Returns
    -------
    vact : ndarray
        voice activity. time length of vact is same as input data.
        If 0, it is unvoiced, 1 is voiced.
    """

    # check argument
    if fs_vad not in [8000, 16000, 32000, 48000]:
        raise ValueError('fs_vad must be 8000, 16000, 32000 or 48000.')

    if hop_length not in [10, 20, 30]:
        raise ValueError('hop_length must be 10, 20, or 30.')

    if vad_mode not in [0, 1, 2, 3]:
        raise ValueError('vad_mode must be 0, 1, 2 or 3.')

    # check data
    if data.dtype.kind == 'i':
        if data.max() > 2**15 - 1 or data.min() < -2**15:
            raise ValueError(
                'When data.type is int, data must be -32768 < data < 32767.')
        data = data.astype('f') / 2.0**15

    elif data.dtype.kind == 'f':
        if np.abs(data).max() > 1:
            # librosa.load()后有可能稍微大于1.0
            data = MinMaxScaler(
                (-1, 1)).fit_transform(data.reshape(-1, 1)).reshape(-1)
            # raise ValueError(
            #     'When data.type is float, data must be -1.0 <= data <= 1.0.')
        data = data.astype('f')

    else:
        raise ValueError('data.dtype must be int or float.')

    data = data.squeeze()
    if not data.ndim == 1:
        raise ValueError('data must be mono (1 ch).')

    # resampling
    if fs != fs_vad:
        resampled = resample(data, fs, fs_vad)
        if np.abs(resampled).max() > 1.0:
            resampled *= (0.99 / np.abs(resampled).max())
            warn('Resampling causes data clipping. data was rescaled.')

    else:
        resampled = data

    resampled = (resampled * 2.0**15).astype('int16')

    hop = fs_vad * hop_length // 1000
    framelen = resampled.size // hop + 1
    padlen = framelen * hop - resampled.size
    paded = np.lib.pad(resampled, (0, padlen), 'constant', constant_values=0)
    framed = frame(paded, frame_length=hop, hop_length=hop).T

    vad = webrtcvad.Vad()
    vad.set_mode(vad_mode)
    valist = [vad.is_speech(tmp.tobytes(), fs_vad) for tmp in framed]

    hop_origin = fs * hop_length // 1000
    va_framed = np.zeros([len(valist), hop_origin])
    va_framed[valist] = 1

    return va_framed.reshape(-1)[:data.size]