def delete_mixtures(params, nb_cpu, nb_gpu, use_gpu): data_file = params.data_file N_e = params.getint('data', 'N_e') N_total = params.nb_channels N_t = params.getint('detection', 'N_t') template_shift = params.getint('detection', 'template_shift') cc_merge = params.getfloat('clustering', 'cc_mixtures') mixtures = [] to_remove = [] filename = params.get('data', 'file_out_suff') + '.overlap-mixtures.hdf5' norm_templates = load_data(params, 'norm-templates') best_elec = load_data(params, 'electrodes') limits = load_data(params, 'limits') nodes, edges = get_nodes_and_edges(params) inv_nodes = numpy.zeros(N_total, dtype=numpy.int32) inv_nodes[nodes] = numpy.argsort(nodes) overlap = get_overlaps(params, extension='-mixtures', erase=True, normalize=False, maxoverlap=False, verbose=False, half=True, use_gpu=use_gpu, nb_cpu=nb_cpu, nb_gpu=nb_gpu) overlap.close() SHARED_MEMORY = get_shared_memory_flag(params) if SHARED_MEMORY: c_overs = load_data_memshared(params, 'overlaps', extension='-mixtures', use_gpu=use_gpu, nb_cpu=nb_cpu, nb_gpu=nb_gpu) else: c_overs = load_data(params, 'overlaps', extension='-mixtures') if SHARED_MEMORY: templates = load_data_memshared(params, 'templates', normalize=False) else: templates = load_data(params, 'templates') x, N_tm = templates.shape nb_temp = int(N_tm // 2) merged = [nb_temp, 0] overlap_0 = numpy.zeros(nb_temp, dtype=numpy.float32) distances = numpy.zeros((nb_temp, nb_temp), dtype=numpy.int32) for i in xrange(nb_temp - 1): data = c_overs[i].toarray() distances[i, i + 1:] = numpy.argmax(data[i + 1:, :], 1) distances[i + 1:, i] = distances[i, i + 1:] overlap_0[i] = data[i, N_t] all_temp = numpy.arange(comm.rank, nb_temp, comm.size) sorted_temp = numpy.argsort( norm_templates[:nb_temp])[::-1][comm.rank::comm.size] M = numpy.zeros((2, 2), dtype=numpy.float32) V = numpy.zeros((2, 1), dtype=numpy.float32) to_explore = xrange(comm.rank, len(sorted_temp), comm.size) if comm.rank == 0: to_explore = get_tqdm_progressbar(to_explore) for count, k in enumerate(to_explore): k = sorted_temp[k] electrodes = numpy.take(inv_nodes, edges[nodes[best_elec[k]]]) overlap_k = c_overs[k] is_in_area = numpy.in1d(best_elec, electrodes) all_idx = numpy.arange(len(best_elec))[is_in_area] been_found = False t_k = None for i in all_idx: t_i = None if not been_found: overlap_i = c_overs[i] M[0, 0] = overlap_0[i] V[0, 0] = overlap_k[i, distances[k, i]] for j in all_idx[i + 1:]: t_j = None M[1, 1] = overlap_0[j] M[1, 0] = overlap_i[j, distances[k, i] - distances[k, j]] M[0, 1] = M[1, 0] V[1, 0] = overlap_k[j, distances[k, j]] try: [a1, a2] = numpy.dot(scipy.linalg.inv(M), V) except Exception: [a1, a2] = [0, 0] a1_lim = limits[i] a2_lim = limits[j] is_a1 = (a1_lim[0] <= a1) and (a1 <= a1_lim[1]) is_a2 = (a2_lim[0] <= a2) and (a2 <= a2_lim[1]) if is_a1 and is_a2: if t_k is None: t_k = templates[:, k].toarray().ravel() if t_i is None: t_i = templates[:, i].toarray().ravel() if t_j is None: t_j = templates[:, j].toarray().ravel() new_template = (a1 * t_i + a2 * t_j) similarity = numpy.corrcoef(t_k, new_template)[0, 1] local_overlap = numpy.corrcoef(t_i, t_j)[0, 1] if similarity > cc_merge and local_overlap < cc_merge: if k not in mixtures: mixtures += [k] been_found = True #print "Template", k, 'is sum of (%d, %g) and (%d,%g)' %(i, a1, j, a2) break sys.stderr.flush() #print mixtures to_remove = numpy.unique(numpy.array(mixtures, dtype=numpy.int32)) to_remove = all_gather_array(to_remove, comm, 0, dtype='int32') if len(to_remove) > 0 and comm.rank == 0: result = load_data(params, 'clusters') slice_templates(params, to_remove) slice_clusters(params, result, to_remove=to_remove) comm.Barrier() del c_overs if comm.rank == 0: os.remove(filename) return [nb_temp, len(to_remove)]
def delete_mixtures(comm, params, nb_cpu, nb_gpu, use_gpu): templates = load_data(params, 'templates') templates = load_data(params, 'templates') N_e = params.getint('data', 'N_e') N_t = params.getint('data', 'N_t') cc_merge = params.getfloat('clustering', 'cc_merge') x, N_tm = templates.shape nb_temp = N_tm//2 merged = [nb_temp, 0] mixtures = [] to_remove = [] overlap = get_overlaps(comm, params, extension='-mixtures', erase=True, normalize=False, maxoverlap=False, verbose=False, half=True, use_gpu=use_gpu, nb_cpu=nb_cpu, nb_gpu=nb_gpu) filename = params.get('data', 'file_out_suff') + '.overlap-mixtures.hdf5' result = [] norm_templates = load_data(params, 'norm-templates') templates = load_data(params, 'templates') result = load_data(params, 'clusters') best_elec = load_data(params, 'electrodes') limits = load_data(params, 'limits') N_total = params.getint('data', 'N_total') nodes, edges = get_nodes_and_edges(params) inv_nodes = numpy.zeros(N_total, dtype=numpy.int32) inv_nodes[nodes] = numpy.argsort(nodes) distances = numpy.zeros((nb_temp, nb_temp), dtype=numpy.float32) over_x = overlap.get('over_x')[:] over_y = overlap.get('over_y')[:] over_data = overlap.get('over_data')[:] over_shape = overlap.get('over_shape')[:] overlap.close() overlap = scipy.sparse.csr_matrix((over_data, (over_x, over_y)), shape=over_shape) for i in xrange(nb_temp-1): distances[i, i+1:] = numpy.argmax(overlap[i*nb_temp+i+1:(i+1)*nb_temp].toarray(), 1) distances[i+1:, i] = distances[i, i+1:] all_temp = numpy.arange(comm.rank, nb_temp, comm.size) overlap_0 = overlap[:, N_t].toarray().reshape(nb_temp, nb_temp) if comm.rank == 0: pbar = get_progressbar(size=len(all_temp)).start() sorted_temp = numpy.argsort(norm_templates[:nb_temp])[::-1][comm.rank::comm.size] M = numpy.zeros((2, 2), dtype=numpy.float32) V = numpy.zeros((2, 1), dtype=numpy.float32) for count, k in enumerate(sorted_temp): electrodes = numpy.take(inv_nodes, edges[nodes[best_elec[k]]]) overlap_k = overlap[k*nb_temp:(k+1)*nb_temp].tolil() is_in_area = numpy.in1d(best_elec, electrodes) all_idx = numpy.arange(len(best_elec))[is_in_area] been_found = False for i in all_idx: if not been_found: overlap_i = overlap[i*nb_temp:(i+1)*nb_temp].tolil() M[0, 0] = overlap_0[i, i] V[0, 0] = overlap_k[i, distances[k, i]] for j in all_idx[i+1:]: M[1, 1] = overlap_0[j, j] M[1, 0] = overlap_i[j, distances[k, i] - distances[k, j]] M[0, 1] = M[1, 0] V[1, 0] = overlap_k[j, distances[k, j]] try: [a1, a2] = numpy.dot(scipy.linalg.inv(M), V) except Exception: [a1, a2] = [0, 0] a1_lim = limits[i] a2_lim = limits[j] is_a1 = (a1_lim[0] <= a1) and (a1 <= a1_lim[1]) is_a2 = (a2_lim[0] <= a2) and (a2 <= a2_lim[1]) if is_a1 and is_a2: new_template = (a1*templates[:, i].toarray() + a2*templates[:, j].toarray()).ravel() similarity = numpy.corrcoef(templates[:, k].toarray().ravel(), new_template)[0, 1] if similarity > cc_merge: if k not in mixtures: mixtures += [k] been_found = True break #print "Template", k, 'is sum of (%d, %g) and (%d,%g)' %(i, a1, j, a2) if comm.rank == 0: pbar.update(count) if comm.rank == 0: pbar.finish() #print mixtures to_remove = numpy.unique(numpy.array(mixtures, dtype=numpy.int32)) to_remove = all_gather_array(to_remove, comm, 0, dtype='int32') if len(to_remove) > 0: slice_templates(comm, params, to_remove) slice_clusters(comm, params, result, to_remove=to_remove) if comm.rank == 0: os.remove(filename) return [nb_temp, len(to_remove)]