def __init__(self, xs, ys, args, xs_pair=None, moco=True): if moco: assert xs_pair is not None self.xs = xs self.xs_pair = xs_pair self.ys = ys self.is_moco = moco self.sample_len = len(self.xs[0]) if args.augment_type == 'warp': self.aug_for_query_moco = ( TimeWarp(seed=10) * 1 # random time warping ) self.aug_for_key_moco = ( TimeWarp(seed=20) * 1 # random time warping ) self.aug_train = ( TimeWarp(seed=30) * 1 # random time warping ) elif args.augment_type == 'noise': self.aug_for_query_moco = (AddNoise(scale=0.01, seed=10)) self.aug_for_key_moco = (AddNoise(scale=0.01, seed=20)) self.aug_train = (AddNoise(scale=0.01, seed=30)) elif args.augment_type == 'drift': self.aug_for_query_moco = (Drift(max_drift=0.7, n_drift_points=5, seed=10)) self.aug_for_key_moco = (Drift(max_drift=0.7, n_drift_points=5, seed=20)) self.aug_train = (Drift(max_drift=0.7, n_drift_points=5, seed=30)) elif args.augment_type == 'crop': crop_size = int(self.sample_len * 0.05) self.aug_for_query_moco = (Crop(size=crop_size, seed=10)) self.aug_for_key_moco = (Crop(size=crop_size, seed=20)) self.aug_train = (Crop(size=crop_size, seed=30)) elif args.augment_type == 'dropout': self.aug_for_query_moco = (Dropout(p=0.1, fill=0, seed=10)) self.aug_for_key_moco = (Dropout(p=0.1, fill=0, seed=20)) self.aug_train = (Dropout(p=0.1, fill=0, seed=30)) elif args.augment_type == 'dropout_0.2': self.aug_for_query_moco = (Dropout(p=0.2, fill=0, seed=10)) self.aug_for_key_moco = (Dropout(p=0.2, fill=0, seed=20)) self.aug_train = (Dropout(p=0.2, fill=0, seed=30)) if self.is_moco: self.d_aug_query = self.aug_for_query_moco.augment(xs) self.d_aug_key = self.aug_for_key_moco.augment(xs_pair) else: self.d_aug_train = self.aug_train.augment(xs)
def test_pipe(): augmenter = ( AddNoise() * 2 @ 0.5 + (Crop(size=int(T / 2)) * 2 + Drift()) + (Dropout() @ 0.5 + Pool()) + Quantize() * 2 ) augmenter.augment(X1) augmenter.augment(X1, Y1) augmenter.augment(X2) augmenter.augment(X2, Y2) augmenter.augment(X2, Y3) augmenter.augment(X3) augmenter.augment(X3, Y2) augmenter.augment(X3, Y3) augmenter.summary() assert len(augmenter) == 6 exchange = Resize(size=int(T / 2)) * 2 @ 0.5 augmenter[3] = exchange assert augmenter[3] is exchange exchange.resize = int(T / 3) exchange.repeats = 3 exchange.prob = 0.4 assert isinstance(augmenter[3], Resize) assert augmenter[3].resize == int(T / 3) assert augmenter[3].repeats == 3 assert augmenter[3].prob == 0.4
def augment_tsaug(filename): y, sr = librosa.load(filename, mono=False) duration = int(librosa.core.get_duration(y, sr)) print(y.shape) # y=np.expand_dims(y.swapaxes(0,1), 0) # N second splice between 1 second to N-1 secondsd splice = random.randint(1, duration - 1) my_augmenter = ( Crop(size=sr * splice) * 5 # random crop subsequences of splice seconds + AddNoise(scale=(0.01, 0.05)) @ 0.5 # with 50% probability, add random noise up to 1% - 5% + Dropout( p=0.1, fill=0, size=[int(0.001 * sr), int(0.01 * sr), int(0.1 * sr)] ) # drop out 10% of the time points (dropped out units are 1 ms, 10 ms, or 100 ms) and fill the dropped out points with zeros ) y_aug = my_augmenter.augment(y) newfile = 'tsaug_' + filename sf.write(newfile, y_aug.T, sr) return [newfile]
def test_crop_X3(): X = np.concatenate( [ np.arange(200).reshape(2, -1, 1), -1 * np.arange(200).reshape(2, -1, 1), ], axis=2, ) X_crops = Crop(crop_start=[[0, 90, 20], [10, 30, 15]], crop_size=5).run(X) assert X_crops.shape == (6, 5, 2) assert (X_crops[3, :, 0] == np.arange(110, 115)).all() assert (X_crops[1, :, 1] == -np.arange(90, 95)).all()
def test_crop_X1(): X = np.arange(100) X_crops = Crop(crop_start=[[0, 90, 20]], crop_size=5).run(X) assert X_crops.shape == (3, 5) assert (X_crops[0, :] == np.arange(5)).all() assert (X_crops[1, :] == np.arange(90, 95)).all()
def test_crop_X2(): X = np.arange(200).reshape(2, -1) X_crops = Crop(crop_start=[[0, 90, 20], [10, 30, 15]], crop_size=5).run(X) assert X_crops.shape == (6, 5) assert (X_crops[3, :] == np.arange(110, 115)).all() assert (X_crops[1, :] == np.arange(90, 95)).all()
AddNoise, Convolve, Crop, Drift, Dropout, Pool, Quantize, Resize, Reverse, TimeWarp, ) augmenters = [ AddNoise(), Convolve(size=(7, 10)) * 10, Crop(size=10), Drift(), Dropout(), Pool(size=[2, 4, 8]) * 10, Quantize(n_levels=[10, 20, 30]) * 10, Reverse() @ 0.5 * 10, TimeWarp(), ] N = 10 T = 1000 C = 3 L = 2 M = 4 X1 = np.random.uniform(size=T)
from tsaug import TimeWarp, Crop, Quantize, Drift, Reverse from pandas import read_csv, DataFrame dataset = read_csv('CLEANED_ECX_EUA_.csv', delimiter=",", engine='python') dataset.columns = [ 'Date', 'Open', 'High', 'Low', 'Settle', 'Change', 'CloseToPredict' ] dataset = dataset.drop(columns=['Date', 'Change', 'CloseToPredict']) X = dataset.to_numpy() X = X.reshape(len(dataset.columns), len(dataset)) print(len(X)) print(X.shape) my_augmenter = (TimeWarp() * 1 + Crop(size=3000) + Quantize(n_levels=[10, 20, 30]) + Drift(max_drift=(0.1, 0.5)) @ 0.8 + Reverse() @ 0.5) X_aug = my_augmenter.augment(X) print(X) print(X_aug) test = X_aug.reshape(-1, 4) new = DataFrame(test) print(new) new.to_csv("SYNTHETIC_DATA_3K_SAMPLE.csv", index=False, header=False)
AddNoise(distr="laplace", seed=0), AddNoise(distr="uniform", seed=0), AddNoise(kind="multiplicative", seed=0), AddNoise(per_channel=False, normalize=False, seed=0), Convolve(repeats=M, prob=0.5, seed=0), Convolve(window=["hann", "blackman", ("gaussian", 1)], seed=0), Convolve( window=["hann", "blackman", ("gaussian", 1)], per_channel=True, seed=0 ), Convolve(window=("gaussian", 1), seed=0), Convolve(size=(7, 11), seed=0), Convolve(size=(7, 11), per_channel=True, seed=0), Convolve(size=[7, 11], seed=0), Convolve(size=[7, 11], per_channel=True, seed=0), Convolve(per_channel=True, seed=0), Crop(size=int(T / 2), repeats=M, seed=0), Crop(size=(int(T / 3), T), resize=int(T / 2), seed=0), Crop(size=[int(T / 3), T], resize=int(T / 2), seed=0), Drift(repeats=M, prob=0.5, seed=0), Drift(max_drift=(0.5, 1.0), seed=0), Drift(n_drift_points=[3, 8], seed=0), Drift(kind="multiplicative", seed=0), Drift(per_channel=False, normalize=False, seed=0), Dropout(repeats=M, prob=0.5, seed=0), Dropout(p=(0.01, 0.1), size=(1, 5), seed=0), Dropout(p=[0.01, 0.02, 0.03], size=[1, 2, 3], seed=0), Dropout(fill="bfill", seed=0), Dropout(fill="mean", seed=0), Dropout(fill=0, seed=0), Dropout(per_channel=True, seed=0), Pool(repeats=M, prob=0.5, seed=0),