def merging_cc(params, nb_cpu, nb_gpu, use_gpu): def remove(result, distances, cc_merge): do_merge = True to_merge = numpy.zeros((0, 2), dtype=numpy.int32) g_idx = range(len(distances)) while do_merge: dmax = distances.max() idx = numpy.where(distances == dmax) one_merge = [idx[0][0], idx[1][0]] do_merge = dmax >= cc_merge if do_merge: elec_ic1 = result['electrodes'][one_merge[0]] elec_ic2 = result['electrodes'][one_merge[1]] nic1 = one_merge[0] - numpy.where( result['electrodes'] == elec_ic1)[0][0] nic2 = one_merge[1] - numpy.where( result['electrodes'] == elec_ic2)[0][0] mask1 = result['clusters_' + str(elec_ic1)] > -1 mask2 = result['clusters_' + str(elec_ic2)] > -1 tmp1 = numpy.unique(result['clusters_' + str(elec_ic1)][mask1]) tmp2 = numpy.unique(result['clusters_' + str(elec_ic2)][mask2]) elements1 = numpy.where(result['clusters_' + str(elec_ic1)] == tmp1[nic1])[0] elements2 = numpy.where(result['clusters_' + str(elec_ic2)] == tmp2[nic2])[0] if len(elements1) > len(elements2): to_remove = one_merge[1] to_keep = one_merge[0] elec = elec_ic2 elements = elements2 else: to_remove = one_merge[0] to_keep = one_merge[1] elec = elec_ic1 elements = elements1 result['data_' + str(elec)] = numpy.delete(result['data_' + str(elec)], elements, axis=0) result['clusters_' + str(elec)] = numpy.delete( result['clusters_' + str(elec)], elements) result['times_' + str(elec)] = numpy.delete( result['times_' + str(elec)], elements) result['peaks_' + str(elec)] = numpy.delete( result['peaks_' + str(elec)], elements) result['electrodes'] = numpy.delete(result['electrodes'], to_remove) distances = numpy.delete(distances, to_remove, axis=0) distances = numpy.delete(distances, to_remove, axis=1) to_merge = numpy.vstack( (to_merge, numpy.array([g_idx[to_keep], g_idx[to_remove]]))) g_idx.pop(to_remove) return to_merge, result data_file = params.data_file N_e = params.getint('data', 'N_e') N_total = params.nb_channels N_t = params.getint('detection', 'N_t') template_shift = params.getint('detection', 'template_shift') blosc_compress = params.getboolean('data', 'blosc_compress') N_tm = load_data(params, 'nb_templates') nb_temp = int(N_tm // 2) to_merge = [] cc_merge = params.getfloat('clustering', 'cc_merge') norm = N_e * N_t result = [] overlap = get_overlaps(params, extension='-merging', erase=True, normalize=True, maxoverlap=False, verbose=False, half=True, use_gpu=use_gpu, nb_cpu=nb_cpu, nb_gpu=nb_gpu) overlap.close() filename = params.get('data', 'file_out_suff') + '.overlap-merging.hdf5' SHARED_MEMORY = get_shared_memory_flag(params) if not SHARED_MEMORY: over_x, over_y, over_data, over_shape = load_data(params, 'overlaps-raw', extension='-merging') else: over_x, over_y, over_data, over_shape = load_data_memshared( params, 'overlaps-raw', extension='-merging', use_gpu=use_gpu, nb_cpu=nb_cpu, nb_gpu=nb_gpu) #sub_comm, is_local = get_local_ring(True) #if is_local: distances = numpy.zeros((nb_temp, nb_temp), dtype=numpy.float32) to_explore = numpy.arange(nb_temp - 1)[comm.rank::comm.size] for i in to_explore: idx = numpy.where((over_x >= i * nb_temp + i + 1) & (over_x < ((i + 1) * nb_temp)))[0] local_x = over_x[idx] - (i * nb_temp + i + 1) data = numpy.zeros((nb_temp - (i + 1), over_shape[1]), dtype=numpy.float32) data[local_x, over_y[idx]] = over_data[idx] distances[i, i + 1:] = numpy.max(data, 1) / norm distances[i + 1:, i] = distances[i, i + 1:] #Now we need to sync everything across nodes distances = gather_array(distances, comm, 0, 1, 'float32', compress=blosc_compress) if comm.rank == 0: distances = distances.reshape(comm.size, nb_temp, nb_temp) distances = numpy.sum(distances, 0) #sub_comm.Barrier() #sub_comm.Free() if comm.rank == 0: result = load_data(params, 'clusters') to_merge, result = remove(result, distances, cc_merge) to_merge = numpy.array(to_merge) to_merge = comm.bcast(to_merge, root=0) if len(to_merge) > 0: slice_templates(params, to_merge=to_merge) slice_clusters(params, result) comm.Barrier() del result, over_x, over_y, over_data if comm.rank == 0: os.remove(filename) return [nb_temp, len(to_merge)]
def apply_patch_for_similarities(params, extension): if not test_patch_for_similarities(params, extension): import circus.shared.files as io file_out_suff = params.get('data', 'file_out_suff') hdf5_compress = params.getboolean('data', 'hdf5_compress') blosc_compress = params.getboolean('data', 'blosc_compress') N_tm = io.load_data(params, 'nb_templates', extension) N_half = int(N_tm // 2) N_t = params.getint('detection', 'N_t') duration = 2 * N_t - 1 if comm.rank == 0: print_and_log(["Fixing overlaps from 0.5.XX..."], 'default', logger) maxlag = numpy.zeros((N_half, N_half), dtype=numpy.int32) maxoverlap = numpy.zeros((N_half, N_half), dtype=numpy.float32) to_explore = numpy.arange(N_half - 1)[comm.rank::comm.size] if comm.rank == 0: to_explore = get_tqdm_progressbar(params, to_explore) if not SHARED_MEMORY: over_x, over_y, over_data, over_shape = io.load_data( params, 'overlaps-raw', extension=extension) else: over_x, over_y, over_data, over_shape = io.load_data_memshared( params, 'overlaps-raw', extension=extension) for i in to_explore: idx = numpy.where((over_x >= i * N_tm + i + 1) & (over_x < (i * N_tm + N_half)))[0] local_x = over_x[idx] - (i * N_tm + i + 1) data = numpy.zeros((N_half - (i + 1), duration), dtype=numpy.float32) data[local_x, over_y[idx]] = over_data[idx] maxlag[i, i + 1:] = N_t - numpy.argmax(data, 1) maxlag[i + 1:, i] = -maxlag[i, i + 1:] maxoverlap[i, i + 1:] = numpy.max(data, 1) maxoverlap[i + 1:, i] = maxoverlap[i, i + 1:] # Now we need to sync everything across nodes. maxlag = gather_array(maxlag, comm, 0, 1, 'int32', compress=blosc_compress) if comm.rank == 0: maxlag = maxlag.reshape(comm.size, N_half, N_half) maxlag = numpy.sum(maxlag, 0) maxoverlap = gather_array(maxoverlap, comm, 0, 1, 'float32', compress=blosc_compress) if comm.rank == 0: maxoverlap = maxoverlap.reshape(comm.size, N_half, N_half) maxoverlap = numpy.sum(maxoverlap, 0) if comm.rank == 0: myfile2 = h5py.File(file_out_suff + '.templates%s.hdf5' % extension, 'r+', libver='earliest') for key in ['maxoverlap', 'maxlag', 'version']: if key in myfile2.keys(): myfile2.pop(key) myfile2.create_dataset('version', data=numpy.array( circus.__version__.split('.'), dtype=numpy.int32)) if hdf5_compress: myfile2.create_dataset('maxlag', data=maxlag, compression='gzip') myfile2.create_dataset('maxoverlap', data=maxoverlap, compression='gzip') else: myfile2.create_dataset('maxlag', data=maxlag) myfile2.create_dataset('maxoverlap', data=maxoverlap) myfile2.close()