def generate_tracks_classification(n, dimensions, min_T=5, max_T=1001): """ Generate tracks for training classification model Parameters: n: number of tracks to generate dimensions: number of dimensions (currently only supports 1 and 2) min_T: minimum track length max_T: maximum track length (e.g. for 1001 will generate tracks up to 1000 steps) Returns: tracks_array: a numpy array of shape [n, max_T, dimensions] containing the generated tracks classes: a numpy array of length n, representing the model class for each track (see andi_datasets package) """ # Create tracks np.random.seed() AD = andi.andi_datasets() X1, Y1, X2, Y2, X3, Y3 = AD.andi_dataset(N=n, min_T=min_T, max_T=max_T, tasks=[2], dimensions=[dimensions]) classes = np.array(Y2[dimensions - 1]).astype(int) tracks = X2[dimensions - 1] # Package into array tracks_array = package_tracks(tracks=tracks, max_T=max_T, dimensions=dimensions) return tracks_array, classes
def plot_results(self, alpha): models = torch.unique(alpha[:, 3]) n_models = len(models) fig = plt.figure(figsize=(10, 10), dpi=100) ax1 = fig.add_subplot(211) ax1.set_xlabel("Trajectory length") ax1.set_ylabel("Mean absolute error") ax1.set_title("Prediction of exponent") for m in models: x, y = smooth_point_cloud( alpha[alpha[:, 3] == m, 2].detach().cpu().numpy(), torch.abs(alpha[alpha[:, 3] == m, 0] - alpha[alpha[:, 3] == m, 1]).detach().cpu().numpy()) try: l = andi.andi_datasets().avail_models_name[int(m)] except: l = "all models" ax1.plot(x, y, label=l) ax1.legend() ax2 = fig.add_subplot(212) ax2.set_xlabel("Noise ratio") ax2.set_ylabel("Mean absolute error") x, y = smooth_point_cloud( alpha[:, 4].detach().cpu().numpy(), torch.abs(alpha[:, 0] - alpha[:, 1]).detach().cpu().numpy()) ax2.plot(x, y) plt.tight_layout() return fig
def generate_tracks_regression(n, dimensions, min_T=5, max_T=1001): # Create tracks AD = andi.andi_datasets() X1, Y1, X2, Y2, X3, Y3 = AD.andi_dataset(N=n, min_T=min_T, max_T=max_T, tasks=[1], dimensions=[dimensions]) exponents = np.array(Y1[dimensions - 1]) tracks = X1[dimensions - 1] # Package into array tracks_array = np.zeros([n, max_T, dimensions]) if dimensions == 1: for i, t in enumerate(tracks): tracks_array[i, max_T - len(t):, 0] = t elif dimensions == 2: for i, t in enumerate(tracks): len_t = int(len(t) / 2) tracks_array[i, max_T - len_t:, 0] = t[:len_t] tracks_array[i, max_T - len_t:, 1] = t[len_t:] # Preprocess tracks_array = preprocess_tracks(tracks_array) return tracks_array, exponents
def generate_tracks_segmentation(n, dimensions): """ Generate tracks for training segmentation model (all length 200) Parameters: n: number of tracks to generate dimensions: number of dimensions (currently only supports 1 and 2) Returns: tracks_array: a numpy array of shape [n, 200, dimensions] containing the generated tracks positions: a numpy array of length n, representing the switch point for each model """ # Create tracks np.random.seed() AD = andi.andi_datasets() X1, Y1, X2, Y2, X3, Y3 = AD.andi_dataset(N=n, tasks=[3], dimensions=[dimensions], min_T=200, max_T=201) positions = np.array(Y3[dimensions - 1])[:, 1].astype(int) - 1 tracks = X3[dimensions - 1] # Package into array tracks_array = package_tracks(tracks=tracks, max_T=200, dimensions=dimensions) return tracks_array, positions
def generate_tracks_regression(n, dimensions, min_T=5, max_T=1001): """ Generate tracks for training regression model Parameters: n: number of tracks to generate dimensions: number of dimensions (currently only supports 1 and 2) min_T: minimum track length max_T: maximum track length (e.g. for 1001 will generate tracks up to 1000 steps) Returns: tracks_array: a numpy array of shape [n, max_T, dimensions] containing the generated tracks exponents: a numpy array of length n, containing the anomalous exponent value for each track """ # Create tracks np.random.seed() # prevents data duplication AD = andi.andi_datasets() X1, Y1, X2, Y2, X3, Y3 = AD.andi_dataset(N=n, min_T=min_T, max_T=max_T, tasks=[1], dimensions=[dimensions]) exponents = np.array(Y1[dimensions - 1]) tracks = X1[dimensions - 1] # Package into array tracks_array = package_tracks(tracks=tracks, max_T=max_T, dimensions=dimensions) return tracks_array, exponents
def example_trajs(): print('Generowanie i zapisywanie przykładowych trajektorii...') path = 'data/part0/example_traj' dirmake(path) logg('Generowanie przykładowych trajektorii - start') AD = andi.andi_datasets() for model in range(5): try: dataset = AD.create_dataset(100, 1, [0.7], [model], 2) except: dataset = AD.create_dataset(100, 1, [1.7], [model], 2) x = dataset[0][2:102] y = dataset[0][102:] plt.figure(figsize=(2, 2)) plt.cla() plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.xlabel('x') plt.ylabel('y') plt.title(AD.avail_models_name[model], loc='left') plt.plot(x, y, color=colors[model], linewidth=2, alpha=0.5) plt.scatter(x, y, c=range(len(x)), cmap=color_maps[model], marker='.', s=100) plt.savefig(path + '/' + str(AD.avail_models_name[model]) + '.pdf', transparent=True, bbox_inches='tight', dpi=300) logg('Generowanie przykładowych trajektorii - stop') print(' --- ZAKOŃCZONO')
def generate_balanced_dataset(N, dimensions, save, save_path): """ Simple wrapper for generation of balanced dataset for task 1 in ANDI challenge. :param N: int, number of trajectories :param dimensions: list of ints, dimensions for which generate datasets :param save: bool, whether to save data :param save_path: string, the directory for saving data :return X1, Y1, X2, Y2, X3, Y3: numpy arrays, X and Y data in 3 dimensions (if dimension was not requested, a particular array is empty) """ project_directory = os.path.dirname(os.getcwd()) save_directory = os.path.join(project_directory,save_path) if not os.path.exists(save_directory): os.makedirs(save_directory) AD = andi.andi_datasets() X1, Y1, X2, Y2, X3, Y3 = AD.andi_dataset(N = N, tasks = 1, dimensions = dimensions, save_dataset=save, path_datasets = save_directory) return X1, Y1, X2, Y2, X3, Y3
def generate_tracks_segmentation(n, dimensions): # Create tracks AD = andi.andi_datasets() X1, Y1, X2, Y2, X3, Y3 = AD.andi_dataset(N=n, tasks=[3], dimensions=[dimensions]) positions = np.array(Y3[dimensions - 1])[:, 1].astype(int) - 1 tracks = X3[dimensions - 1] # Package into array tracks_array = np.zeros([n, 200, dimensions]) if dimensions == 1: for i, t in enumerate(tracks): tracks_array[i, :, 0] = t elif dimensions == 2: for i, t in enumerate(tracks): len_t = int(len(t) / 2) tracks_array[i, :, 0] = t[:len_t] tracks_array[i, :, 1] = t[len_t:] - t[len_t] # Preprocess tracks_array = preprocess_tracks(tracks_array) return tracks_array, positions
def make_dir(path): if not os.path.exists(path): os.makedirs(path) data_path = './origin_data/' pp_data_path = './pp_data/' make_dir(data_path) make_dir(pp_data_path) filename = data_path + 'data-1d-{}.csv'.format(l) output = pp_data_path + 'data-1d-{}-pp.csv'.format(l) AD = andi.andi_datasets() X1, Y1, X2, Y2, X3, Y3 = AD.andi_dataset(N=N, max_T=l + 1, min_T=l, tasks=1, dimensions=1) with open(filename, 'w') as f: f.write('pos;label\n') for i in range(len(X1[0])): f.write(','.join([str(j) for j in X1[0][i]])) f.write(';' + str(Y1[0][i]) + '\n') f.close() del X1, Y1 gc.collect()
def create_custom_dataset(N, max_T=1000, min_T=10, dimensions=[1, 2, 3], save=True): ad = andi_datasets() exponents = np.arange(0.05, 2.01, 0.05) n_exp, n_models = len(exponents), len(ad.avail_models_name) # Trajectories per model and exponent. Arbitrarely chose to fulfill balanced classes N_per_model = np.ceil(1.6 * N / 5) subdif, superdif = n_exp // 2, n_exp // 2 + 1 num_per_class = np.zeros((n_models, n_exp)) num_per_class[:2, :subdif] = np.ceil(N_per_model / subdif) # ctrw, attm num_per_class[2, :] = np.ceil(N_per_model / (n_exp - 1)) # fbm num_per_class[2, exponents == 2] = 0 # fbm can't be ballistic num_per_class[3, subdif:] = np.ceil((N_per_model / superdif) * 0.8) # lw num_per_class[4, :] = np.ceil(N_per_model / n_exp) # sbm for dim in dimensions: dataset = ad.create_dataset(T=max_T, N=num_per_class, exponents=exponents, dimension=dim, models=np.arange(n_models)) # Normalize trajectories n_traj = dataset.shape[0] norm_trajs = normalize(dataset[:, 2:].reshape(n_traj * dim, max_T)) dataset[:, 2:] = norm_trajs.reshape(dataset[:, 2:].shape) # Add localization error, Gaussian noise with sigma = [0.1, 0.5, 1] loc_error_amplitude = np.random.choice(np.array([0.1, 0.5, 1]), size=n_traj * dim) loc_error = (np.random.randn(n_traj * dim, int(max_T)).transpose() * loc_error_amplitude).transpose() dataset = ad.create_noisy_localization_dataset(dataset, dimension=dim, T=max_T, noise_func=loc_error) # Add random diffusion coefficients trajs = dataset[:, 2:].reshape(n_traj * dim, max_T) displacements = trajs[:, 1:] - trajs[:, :-1] # Get new diffusion coefficients and displacements diffusion_coefficients = np.random.randn(trajs.shape[0]) new_displacements = (displacements.transpose() * diffusion_coefficients).transpose() # Generate new trajectories and add to dataset new_trajs = np.cumsum(new_displacements, axis=1) new_trajs = np.concatenate((np.zeros( (new_trajs.shape[0], 1)), new_trajs), axis=1) dataset[:, 2:] = new_trajs.reshape(dataset[:, 2:].shape) df = pd.DataFrame(columns=['dim', 'model', 'exp', 'x', 'len'], dtype=object) for traj in dataset: mod, exp, x = int(traj[0]), traj[1], traj[2:] x = cut_trajectory(x, np.random.randint(min_T, max_T), dim=dim) x = tensor(x).view(dim, -1).T df = df.append( { 'dim': dim, 'model': mod, 'exp': exp, 'x': x, 'len': len(x) }, ignore_index=True) if save: DATA_PATH.mkdir(exist_ok=True) ds_path = DATA_PATH / f"custom{dim}.pkl" df.to_pickle(ds_path, protocol=pickle.HIGHEST_PROTOCOL) return df
def example_TAMSD(): print('Generowanie i zapisywanie przykładowych TAMSD...') path = 'data/part0/example_TAMSD' dirmake(path) logg('Generowanie przykładowych TAMSD - start') AD = andi.andi_datasets() dataset = AD.create_dataset(200, 1, [0.7], [2], 2) x = dataset[0][2:202] y = dataset[0][202:] trajectory = [x, y] D, expo, expo_est, tamsds = TAMSD_estimation(trajectory, 0.7, 0, 'A') tamsds = tamsds[:100] t = range(1, len(tamsds) + 1) expo_est = estimate_expo(t, tamsds, D, 100) plt.cla() plt.figure(figsize=(3, 3)) plt.plot(t, tamsds, '.', label='punkty TAMSD') plt.plot(t, [4 * D * i**expo_est for i in t], 'b', label=r'Wyestymowana krzywa wzorcowa') plt.plot(t, [4 * D * i**expo for i in t], 'r', label=r'Prawdziwa krzywa wzorcowa') plt.xlabel('t') plt.ylabel(r'$\rho(t)$') plt.title('c', loc='left') plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.savefig(path + '/TAMSD.pdf', transparent=True, bbox_inches='tight', dpi=300) plt.cla() plt.loglog(t, tamsds, '.', label='punkty TAMSD') plt.loglog(t, [4 * D * i**expo_est for i in t], 'b', label=r'Wyestymowana krzywa TAMSD') plt.loglog(t, [4 * D * i**expo for i in t], 'r', label=r'Prawdziwa krzywa wzorcowa') plt.xlabel('t') plt.ylabel(r'$\rho(t)$') plt.legend(loc='lower left', bbox_to_anchor=(1.05, 1)) plt.title('d', loc='left') plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.savefig(path + '/TAMSD_loglog.pdf', transparent=True, bbox_inches='tight', dpi=300) # perfekcyjne tamsd plt.cla() D = 0.3 t = [0.1 * i for i in range(101)] exps = [0.7, 1, 1.3] label = ['superdyfuzja', 'dyfuzja normalna', 'subdyfuzja'] for expo in exps: plt.plot(t, [4 * D * i**expo for i in t], color=colors[exps.index(expo)], label=r'$\alpha=\ $' + str(expo) + ' - ' + label[-exps.index(expo) - 1]) plt.xlabel('t') plt.ylabel(r'$\rho(t)$') plt.legend(loc='lower left', bbox_to_anchor=(1.05, 1), ncol=3) plt.title('a', loc='left') plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.savefig(path + '/perfect_TAMSD.pdf', transparent=True, bbox_inches='tight', dpi=300) # perfekcyjne tamsd - loglog plt.cla() D = 1 t = [0.1 * i for i in range(101)] exps = [0.7, 1, 1.3] for expo in exps: plt.loglog(t, [4 * D * i**expo for i in t], color=colors[exps.index(expo)], label=r'$\alpha=\ $' + str(expo)) plt.xlabel('t') plt.ylabel(r'$\rho(t)$') plt.title('b', loc='left') plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.savefig(path + '/perfect_TAMSD_loglog.pdf', transparent=True, bbox_inches='tight', dpi=300) logg('Generowanie przykładowych TAMSD - stop') print(' --- ZAKOŃCZONO')