def encode_lightcurve_twed(cluster_lcs, cluster_times, patches_lcs, patches_times, alpha=1, split=True): num_patches = len(patches_lcs) num_centroids = len(cluster_lcs) D = np.zeros((num_patches, num_centroids)) for i in xrange(num_patches): for j in xrange(num_centroids): A, A_times = patches_lcs[i], patches_times[i] B, B_times = cluster_lcs[j], cluster_times[j] D[i, j] = twed(A, A_times, B, B_times, lam=twed_lambda, nu=twed_nu) # Transformado al espacio de cluster distance df = pd.DataFrame(D) mean = df.mean(axis=1) # la media de cada columna df = df.div(-1) df = df.add(alpha * mean, axis=0) # Dejar todos los valores en Mu - X if split: # Dejar todos los negativos en 0 df1 = df.apply(lambda x: np.maximum(0, x)) # Dejar todos los positivos en 0 df2 = df.apply(lambda x: np.maximum(0, -x)) df = df1.merge(df2, left_index=True, right_index=True) else: # Dejar todos los negativos en 0 df = df.apply(lambda x: np.maximum(0, x)) return df
def pairwise_tweds(pairwise_tweds_file, lcs, times, part, num_parts, lam=0.5, nu=1e-5): """ For parallelization in many jobs, we calculate how many pairwise distances we need to calculate, and then figure out the border indexes of the ones we need to do in the current job """ N = len(lcs) num_tweds = N*(N + 1)/2 tweds_per_part = num_tweds/num_parts begin_index = (part - 1)*tweds_per_part if part == num_parts: end_index = num_tweds else: end_index = tweds_per_part*part print "N: {0}".format(N) print "num_tweds: {0}".format(num_tweds) print "tweds_per_part: {0}".format(tweds_per_part) print "begin_index: {0}".format(begin_index) print "end_index: {0}".format(end_index) print "part: {0}".format(part) print "num_parts: {0}".format(num_parts) D = np.zeros((N, N)) """ twed_file = h5py.File(pairwise_tweds_file + ".hdf5", "w") D = twed_file.create_dataset("twed_matrix_part", (tweds_per_part, ), compression="gzip", scaleoffset=10, shuffle=True) D.attrs["N"] = N D.attrs["part"] = part D.attrs["num_parts"] = num_parts """ k = 0 for current_twed in xrange(begin_index, end_index): i, j = get_coords(N, current_twed + 1) # starting from i saves time since matrix is symmetric # if k % 1000 == 0: # print "{0} of {1}".format(k, tweds_per_part) twed_val = twed(lcs[i], times[i], lcs[j], times[j], lam=lam, nu=nu) complex_coeff = complexity_coeff(lcs[i], times[i], lcs[j], times[j]) final_val = twed_val*complex_coeff # D[current_twed - begin_index] = twed_val D[i, j] = final_val D[j, i] = final_val k += 1 # return twed_file return D