class TestConverting(unittest.TestCase):
    def setUp(self):
        self.all_spikes = None
        self.max_chunk = '100'
        dirname = os.path.abspath(os.path.join(os.path.dirname(__file__), '.'))
        self.path = os.path.join(dirname, 'synthetic')
        if not os.path.exists(self.path):
            os.makedirs(self.path)
        self.file_name = os.path.join(self.path, 'fitting.dat')
        self.source_dataset = get_dataset(self)
        if not os.path.exists(self.file_name):
            mpi_launch('benchmarking', self.source_dataset, 2, 0, 'False',
                       self.file_name, 'fitting', 1)
            mpi_launch('whitening', self.file_name, 2, 0, 'False')
            self.parser = CircusParser(self.file_name)
            self.parser.write('fitting', 'max_chunk', '10')
            mpi_launch('fitting', self.file_name, 2, 0, 'False')
        else:
            self.parser = CircusParser(self.file_name)

    def test_converting_some(self):
        self.parser.write('converting', 'export_pcs', 'some')
        mpi_launch('converting', self.file_name, 1, 0, 'False')
        self.parser.write('converting', 'export_pcs', 'prompt')

    def test_converting_all(self):
        self.parser.write('converting', 'export_pcs', 'all')
        mpi_launch('converting', self.file_name, 2, 0, 'False')
        self.parser.write('converting', 'export_pcs', 'prompt')
class TestSmartSearch(unittest.TestCase):

    def setUp(self):
        self.all_matches    = None
        self.all_templates  = None
        dirname             = os.path.abspath(os.path.join(os.path.dirname(__file__), '.'))
        self.path           = os.path.join(dirname, 'synthetic')
        if not os.path.exists(self.path):
            os.makedirs(self.path)
        self.file_name      = os.path.join(self.path, 'smart_search.dat')
        self.source_dataset = get_dataset(self)
        if not os.path.exists(self.file_name):
            mpi_launch('benchmarking', self.source_dataset, 2, 0, 'False', self.file_name, 'smart-search', 1)
            mpi_launch('whitening', self.file_name, 2, 0, 'False')
        self.parser = CircusParser(self.file_name)
        self.parser.write('clustering', 'max_elts', '2000')

    #def tearDown(self):
    #    data_path = '.'.join(self.file_name.split('.')[:-1])
    #    shutil.rmtree(data_path)

    def test_smart_search_on(self):
        self.parser.write('clustering', 'smart_search', 'True')
        mpi_launch('clustering', self.file_name, 2, 0, 'False')
        self.parser.write('clustering', 'smart_search', 'False')
        res = get_performance(self.file_name, 'smart_search_on')

    def test_smart_search_off(self):
        mpi_launch('clustering', self.file_name, 2, 0, 'False')
        res = get_performance(self.file_name, 'smart_search_off')
class TestWhitening(unittest.TestCase):
    def setUp(self):
        dirname = os.path.abspath(os.path.join(os.path.dirname(__file__), '.'))
        self.path = os.path.join(dirname, 'synthetic')
        if not os.path.exists(self.path):
            os.makedirs(self.path)
        self.file_name = os.path.join(self.path, 'whitening.dat')
        self.source_dataset = get_dataset(self)
        self.whitening = None
        if not os.path.exists(self.file_name):
            mpi_launch('benchmarking', self.source_dataset, 2, 0, 'False',
                       self.file_name, 'fitting', 1)
        self.params = CircusParser(self.file_name)
        self.params.write('clustering', 'max_elts', '1000')
        self.params.write('whitening', 'spatial', 'True')
        self.params.write('clustering', 'temporal', 'False')

    def test_whitening_one_CPU(self):
        mpi_launch('whitening', self.file_name, 1, 0, 'False')
        res = get_performance(self.file_name, 'one_CPU')
        if self.whitening is None:
            self.whitening = res
        assert ((res['spatial'] - self.whitening['spatial'])**2).mean() < 0.1

    def test_whitening_two_CPU(self):
        mpi_launch('whitening', self.file_name, 2, 0, 'False')
        res = get_performance(self.file_name, 'two_CPU')
        if self.whitening is None:
            self.whitening = res
        assert ((res['spatial'] - self.whitening['spatial'])**2).mean() < 0.1

    def test_whitening_safety_time(self):
        self.params.write('clustering', 'safety_time', '5')
        mpi_launch('whitening', self.file_name, 1, 0, 'False')
        self.params.write('clustering', 'safety_time', 'auto')
        res = get_performance(self.file_name, 'safety_time')
        if self.whitening is None:
            self.whitening = res
        assert ((res['spatial'] - self.whitening['spatial'])**2).mean() < 0.1
class TestGarbage(unittest.TestCase):
    def setUp(self):
        self.all_spikes = None
        self.max_chunk = '100'
        dirname = os.path.abspath(os.path.join(os.path.dirname(__file__), '.'))
        self.path = os.path.join(dirname, 'synthetic')
        if not os.path.exists(self.path):
            os.makedirs(self.path)
        self.file_name = os.path.join(self.path, 'fitting.dat')
        self.source_dataset = get_dataset(self)
        if not os.path.exists(self.file_name):
            mpi_launch('benchmarking', self.source_dataset, 2, 0, 'False',
                       self.file_name, 'fitting')
            mpi_launch('whitening', self.file_name, 2, 0, 'False')
        self.parser = CircusParser(self.file_name)

    def test_collect_all(self):
        self.parser.write('fitting', 'max_chunk', self.max_chunk)
        self.parser.write('fitting', 'collect_all', 'True')
        mpi_launch('fitting', self.file_name, 1, 0, 'False')
        self.parser.write('fitting', 'max_chunk', 'inf')
        self.parser.write('fitting', 'collect_all', 'False')
        ctruth, cspikes, cgarbage = get_performance(self.file_name)
        assert cgarbage < cspikes
def main(params, nb_cpu, nb_gpu, use_gpu, file_name, benchmark, sim_same_elec):
    """
    Useful tool to create synthetic datasets for benchmarking.
    
    Arguments
    ---------
    benchmark : {'fitting', 'clustering', 'synchrony', 'pca-validation', 'smart-search', 'drifts'}
        
    """
    if sim_same_elec is None:
        sim_same_elec = 0.8

    logger         = init_logging(params.logfile)
    logger         = logging.getLogger('circus.benchmarking')

    numpy.random.seed(265)
    file_name      = os.path.abspath(file_name)
    data_path      = os.path.dirname(file_name)
    data_suff, ext = os.path.splitext(os.path.basename(file_name))
    file_out, ext  = os.path.splitext(file_name)

    if ext == '':
        ext = '.dat'
        file_name += ext
    
    if ext != '.dat':
        if comm.rank == 0:
            print_and_log(['Benchmarking produces raw files: select a .dat extension'], 'error', logger)
        sys.exit(0)

    if benchmark not in ['fitting', 'clustering', 'synchrony', 'smart-search', 'drifts']:
        if comm.rank == 0:
            print_and_log(['Benchmark need to be in [fitting, clustering, synchrony, smart-search, drifts]'], 'error', logger)
        sys.exit(0)

    # The extension `.p` or `.pkl` or `.pickle` seems more appropriate than `.pic`.
    # see: http://stackoverflow.com/questions/4530111/python-saving-objects-and-using-pickle-extension-of-filename
    # see: https://wiki.python.org/moin/UsingPickle
    def write_benchmark(filename, benchmark, cells, rates, amplitudes, sampling, probe, trends=None):
        """Save benchmark parameters in a file to remember them."""
        import cPickle
        to_write = {'benchmark' : benchmark}
        to_write['cells']      = cells
        to_write['rates']      = rates
        to_write['probe']      = probe
        to_write['amplitudes'] = amplitudes
        to_write['sampling']   = sampling
        if benchmark == 'drifts':
            to_write['drifts'] = trends
        cPickle.dump(to_write, open(filename + '.pic', 'w'))

    # Retrieve some key parameters.
    templates = io.load_data(params, 'templates')
    N_tm = templates.shape[1] // 2
    trends          = None

    # Normalize some variables.
    if benchmark == 'fitting':
        nb_insert       = 25
        n_cells         = numpy.random.random_integers(0, N_tm - 1, nb_insert)
        rate            = nb_insert * [10]
        amplitude       = numpy.linspace(0.5, 5, nb_insert)
    if benchmark == 'clustering':
        n_point         = 5
        n_cells         = numpy.random.random_integers(0, N_tm - 1, n_point ** 2)
        x, y            = numpy.mgrid[0:n_point, 0:n_point]
        rate            = numpy.linspace(0.5, 20, n_point)[x.flatten()]
        amplitude       = numpy.linspace(0.5, 5, n_point)[y.flatten()]
    if benchmark == 'synchrony':
        nb_insert       = 5
        corrcoef        = 0.2
        n_cells         = nb_insert * [numpy.random.random_integers(0, N_tm - 1, 1)[0]]
        rate            = 10. / corrcoef
        amplitude       = 2
    if benchmark == 'pca-validation':
        nb_insert       = 10
        n_cells         = numpy.random.random_integers(0, N_tm - 1, nb_insert)
        rate_min        = 0.5
        rate_max        = 20.0
        rate            = rate_min + (rate_max - rate_min) * numpy.random.random_sample(nb_insert)
        amplitude_min   = 0.5
        amplitude_max   = 5.0
        amplitude       = amplitude_min + (amplitude_max - amplitude_min) * numpy.random.random_sample(nb_insert)
    if benchmark == 'smart-search':
        nb_insert       = 10
        n_cells         = nb_insert*[numpy.random.random_integers(0, templates.shape[1]//2-1, 1)[0]]
        rate            = 1 + 5*numpy.arange(nb_insert)
        amplitude       = 2
    if benchmark == 'drifts':
        n_point         = 5
        n_cells         = numpy.random.random_integers(0, templates.shape[1]//2-1, n_point**2)
        x, y            = numpy.mgrid[0:n_point,0:n_point]
        rate            = 5*numpy.ones(n_point)[x.flatten()]
        amplitude       = numpy.linspace(0.5, 5, n_point)[y.flatten()]
        trends          = numpy.random.randn(n_point**2)

    # Delete the output directory tree if this output directory exists.
    if comm.rank == 0:
        if os.path.exists(file_out):
            shutil.rmtree(file_out)

    # Check and normalize some variables.
    if n_cells is None:
        n_cells    = 1
        cells      = [numpy.random.permutation(numpy.arange(n_cells))[0]]
    elif not numpy.iterable(n_cells):
        cells      = [n_cells]
        n_cells    = 1
    else:
        cells      = n_cells
        n_cells    = len(cells)

    if numpy.iterable(rate):
        assert len(rate) == len(cells), "Should have the same number of rates and cells"
    else:
        rate = [rate] * len(cells)

    if numpy.iterable(amplitude):
        assert len(amplitude) == len(cells), "Should have the same number of amplitudes and cells"
    else:
        amplitude = [amplitude] * len(cells)

    # Retrieve some additional key parameters.
    #params           = detect_memory(params)
    data_file        = params.get_data_file(source=True)
    N_e              = params.getint('data', 'N_e')
    N_total          = params.nb_channels
    hdf5_compress    = params.getboolean('data', 'hdf5_compress')
    nodes, edges     = get_nodes_and_edges(params)
    N_t              = params.getint('detection', 'N_t')
    inv_nodes        = numpy.zeros(N_total, dtype=numpy.int32)
    inv_nodes[nodes] = numpy.argsort(nodes)
    do_temporal_whitening = params.getboolean('whitening', 'temporal')
    do_spatial_whitening  = params.getboolean('whitening', 'spatial')
    N_tm_init             = templates.shape[1]//2
    thresholds            = io.load_data(params, 'thresholds')
    limits                = io.load_data(params, 'limits')
    best_elecs            = io.load_data(params, 'electrodes')
    norms                 = io.load_data(params, 'norm-templates')

    # Create output directory if it does not exist.
    if comm.rank == 0:
        if not os.path.exists(file_out):
            os.makedirs(file_out)

    # Save benchmark parameters in a file to remember them.
    if comm.rank == 0:
        write_benchmark(file_out, benchmark, cells, rate, amplitude,
                        params.rate, params.get('data', 'mapping'), trends)

    # Synchronize all the threads/processes.
    comm.Barrier()

    if do_spatial_whitening:
        spatial_whitening  = io.load_data(params, 'spatial_whitening')
    if do_temporal_whitening:
        temporal_whitening = io.load_data(params, 'temporal_whitening')

    # Retrieve some additional key parameters.
    chunk_size     = params.getint('data', 'chunk_size')
    scalings       = []
    
    params.set('data', 'data_file', file_name)

    data_file_out = params.get_data_file(is_empty=True)
    data_file_out.allocate(shape=data_file.shape)

    # Synchronize all the threads/processes.
    comm.Barrier()

    # For each wanted synthesized cell insert a generated template in the set of
    # existing template.
    for gcount, cell_id in enumerate(cells):
        best_elec   = best_elecs[cell_id]
        indices     = inv_nodes[edges[nodes[best_elec]]]
        count       = 0
        new_indices = []
        all_elecs   = numpy.random.permutation(numpy.arange(N_e))
        reference   = templates[:, cell_id].toarray().reshape(N_e, N_t)
        # Initialize the similarity (i.e. default value).
        similarity = 1.0
        # Find the first eligible template for the wanted synthesized cell.
        while len(new_indices) != len(indices) or (similarity > sim_same_elec): 
            similarity  = 0
            if count == len(all_elecs):
                if comm.rank == 0:
                    print_and_log(["No electrode to move template %d (max similarity is %g)" %(cell_id, similarity)], 'error', logger)
                sys.exit(0)
            else:
                # Get the next shuffled electrode.
                n_elec = all_elecs[count]

                if benchmark not in ['synchrony', 'smart-search']:
                    # Process if the shuffled electrode and the nearest electrode
                    # to the synthesized cell are not identical.
                    local_test = n_elec != best_elec
                else:
                    # Process if the shuffled electrode and the nearest electrode
                    # to the synthesized cell are identical.
                    local_test = n_elec == best_elec

                if local_test:
                    # Shuffle the neighboring electrodes whithout modifying
                    # the nearest electrode to the synthesized cell.
                    new_indices = inv_nodes[edges[nodes[n_elec]]]
                    idx = numpy.where(new_indices != best_elec)[0]
                    new_indices[idx] = numpy.random.permutation(new_indices[idx])

                    if len(new_indices) == len(indices):
                        # Shuffle the templates on the neighboring electrodes.
                        new_temp = numpy.zeros(reference.shape,
                                               dtype=numpy.float32)
                        new_temp[new_indices, :] = reference[indices, :]
                        # Compute the scaling factor which normalize the
                        # shuffled template.
                        gmin = new_temp.min()
                        data = numpy.where(new_temp == gmin)
                        scaling = -thresholds[data[0][0]]/gmin
                        for i in xrange(templates.shape[1]//2):
                            match = templates[:, i].toarray().reshape(N_e, N_t)
                            d = numpy.corrcoef(match.flatten(),
                                               scaling * new_temp.flatten())[0, 1]
                            if d > similarity:
                                similarity = d
                else:
                    new_indices = []
            # Go to the next shuffled electrode.
            count += 1

        #if comm.rank == 0:
        #    print "Template", cell_id, "is shuffled from electrode", best_elec, "to", n_elec, "(max similarity is %g)" %similarity

        N_tm           = templates.shape[1]//2
        to_insert      = numpy.zeros(reference.shape, dtype=numpy.float32)
        to_insert[new_indices] = scaling*amplitude[gcount]*templates[:, cell_id].toarray().reshape(N_e, N_t)[indices]
        to_insert2     = numpy.zeros(reference.shape, dtype=numpy.float32)
        to_insert2[new_indices] = scaling*amplitude[gcount]*templates[:, cell_id + N_tm].toarray().reshape(N_e, N_t)[indices]

        ## Insert the selected template.
        
        # Retrieve the number of existing templates in the dataset.
        N_tm           = templates.shape[1]//2

        # Generate the template of the synthesized cell from the selected
        # template, the target amplitude and the rescaling (i.e. threshold of
        # the target electrode).
        to_insert = numpy.zeros(reference.shape, dtype=numpy.float32)
        to_insert[new_indices] = scaling * amplitude[gcount] * templates[:, cell_id].toarray().reshape(N_e, N_t)[indices]
        to_insert = to_insert.flatten()
        to_insert2 = numpy.zeros(reference.shape, dtype=numpy.float32)
        to_insert2[new_indices] = scaling * amplitude[gcount] * templates[:, cell_id + N_tm].toarray().reshape(N_e, N_t)[indices]
        to_insert2 = to_insert2.flatten()

        # Compute the norm of the generated template.
        mynorm     = numpy.sqrt(numpy.sum(to_insert ** 2) / (N_e * N_t))
        mynorm2    = numpy.sqrt(numpy.sum(to_insert2 ** 2) / (N_e * N_t))

        # Insert the limits of the generated template.
        limits     = numpy.vstack((limits, limits[cell_id]))
        # Insert the best electrode of the generated template.
        best_elecs = numpy.concatenate((best_elecs, [n_elec]))

        # Insert the norm of the generated template (i.e. central component and
        # orthogonal component).
        norms      = numpy.insert(norms, N_tm, mynorm)
        norms      = numpy.insert(norms, 2 * N_tm + 1, mynorm2)
        # Insert the scaling of the generated template.
        scalings  += [scaling]

        # Retrieve the data about the existing templates.
        templates = templates.tocoo()
        xdata     = templates.row
        ydata     = templates.col
        zdata     = templates.data

        # Shift by one the orthogonal components of the existing templates.
        idx       = numpy.where(ydata >= N_tm)[0]
        ydata[idx] += 1

        # Insert the central component of the selected template.
        dx    = to_insert.nonzero()[0].astype(numpy.int32)
        xdata = numpy.concatenate((xdata, dx))
        ydata = numpy.concatenate((ydata, N_tm * numpy.ones(len(dx), dtype=numpy.int32)))
        zdata = numpy.concatenate((zdata, to_insert[dx]))

        # Insert the orthogonal component of the selected template.
        dx    = to_insert2.nonzero()[0].astype(numpy.int32)
        xdata = numpy.concatenate((xdata, dx))
        ydata = numpy.concatenate((ydata, (2 * N_tm + 1) * numpy.ones(len(dx), dtype=numpy.int32)))
        zdata = numpy.concatenate((zdata, to_insert2[dx]))

        # Recontruct the matrix of templates.
        templates = scipy.sparse.csc_matrix((zdata, (xdata, ydata)), shape=(N_e * N_t, 2 * (N_tm + 1)))

    # Remove all the expired data.
    if benchmark == 'pca-validation':
        # Remove all the expired data.
        N_tm_init = 0
        N_tm = templates.shape[1] / 2

        limits = limits[N_tm - nb_insert:, :]
        best_elecs = best_elecs[N_tm - nb_insert:]
        norms = numpy.concatenate((norms[N_tm-nb_insert:N_tm], norms[2*N_tm-nb_insert:2*N_tm]))
        scalings = scalings
        
        templates = templates.tocoo()
        xdata = templates.row
        ydata = templates.col
        zdata = templates.data
        
        idx_cen = numpy.logical_and(N_tm - nb_insert <= ydata, ydata < N_tm)
        idx_cen = numpy.where(idx_cen)[0]
        idx_ort = numpy.logical_and(2 * N_tm - nb_insert <= ydata, ydata < 2 * N_tm)
        idx_ort = numpy.where(idx_ort)[0]
        ydata[idx_cen] = ydata[idx_cen] - (N_tm - nb_insert)
        ydata[idx_ort] = ydata[idx_ort] - 2 * (N_tm - nb_insert)
        idx = numpy.concatenate((idx_cen, idx_ort))
        xdata = xdata[idx]
        ydata = ydata[idx]
        zdata = zdata[idx]
        templates = scipy.sparse.csc_matrix((zdata, (xdata, ydata)), shape=(N_e * N_t, 2 * nb_insert))
        
    # Retrieve the information about the organisation of the chunks of data.
    nb_chunks, last_chunk_len = data_file.analyze(chunk_size)

    # Display informations about the generated benchmark.
    if comm.rank == 0:
        print_and_log(["Generating benchmark data [%s] with %d cells" %(benchmark, n_cells)], 'info', logger)
        purge(file_out, '.data')


    template_shift = params.getint('detection', 'template_shift')
    all_chunks     = numpy.arange(nb_chunks)
    to_process     = all_chunks[numpy.arange(comm.rank, nb_chunks, comm.size)]
    loc_nb_chunks  = len(to_process)
    numpy.random.seed(comm.rank)

    to_explore = xrange(comm.rank, nb_chunks, comm.size)

    # Initialize the progress bar about the generation of the benchmark.
    if comm.rank == 0:
        to_explore = get_tqdm_progressbar(to_explore)

    # Open the file for collective I/O.
    #g = myfile.Open(comm, file_name, MPI.MODE_RDWR)
    #g.Set_view(data_offset, data_mpi, data_mpi)
    data_file_out.open(mode='r+')

    # Open the thread/process' files to collect the results.
    spiketimes_filename = os.path.join(file_out, data_suff + '.spiketimes-%d.data' %comm.rank)
    spiketimes_file = open(spiketimes_filename, 'wb')
    amplitude_filename = os.path.join(file_out, data_suff + '.amplitudes-%d.data' %comm.rank)
    amplitudes_file = open(amplitude_filename, 'wb')
    templates_filename = os.path.join(file_out, data_suff + '.templates-%d.data' %comm.rank)
    templates_file = open(templates_filename, 'wb')
    real_amps_filename = os.path.join(file_out, data_suff + '.real_amps-%d.data' %comm.rank)
    real_amps_file = open(real_amps_filename, 'wb')
    voltages_filename = os.path.join(file_out, data_suff + '.voltages-%d.data' %comm.rank)
    voltages_file = open(voltages_filename, 'wb')

    # For each chunk of data associate to the current thread/process generate
    # the new chunk of data (i.e. with considering the added synthesized cells).
    for count, gidx in enumerate(to_explore):

        #if (last_chunk_len > 0) and (gidx == (nb_chunks - 1)):
        #    chunk_len  = last_chunk_len
        #    chunk_size = last_chunk_len // N_total

        result         = {'spiketimes' : [], 'amplitudes' : [], 
                          'templates' : [], 'real_amps' : [],
                          'voltages' : []}
        offset         = gidx * chunk_size
        local_chunk, t_offset = data_file.get_data(gidx, chunk_size, nodes=nodes)

        if benchmark == 'pca-validation':
            # Clear the current data chunk.
            local_chunk = numpy.zeros(local_chunk.shape, dtype=local_chunk.dtype)

        # Handle whitening if necessary.
        if do_spatial_whitening:
            local_chunk = numpy.dot(local_chunk, spatial_whitening)
        if do_temporal_whitening:
            local_chunk = scipy.ndimage.filters.convolve1d(local_chunk,
                                                           temporal_whitening,
                                                           axis=0,
                                                           mode='constant')

        if benchmark is 'synchrony':
            # Generate some spike indices (i.e. times) at the given rate for
            # 'synchrony' mode. Each synthesized cell will use a subset of this
            # spike times.
            mips = numpy.random.rand(chunk_size) < rate[0] / float(params.rate)

        # For each synthesized cell generate its spike indices (i.e.times) and
        # add them to the dataset.
        for idx in xrange(len(cells)):
            if benchmark is 'synchrony':
                # Choose a subset of the spike indices generated before. The
                # size of this subset is parameterized by the target correlation
                # coefficients.
                sidx       = numpy.where(mips == True)[0]
                spikes     = numpy.zeros(chunk_size, dtype=numpy.bool)
                spikes[sidx[numpy.random.rand(len(sidx)) < corrcoef]] = True
            else:
                # Generate some spike indices at the given rate.
                spikes     = numpy.random.rand(chunk_size) < rate[idx] / float(params.rate)
            if benchmark == 'drifts':
                amplitudes = numpy.ones(len(spikes)) + trends[idx]*((spikes + offset)/(5*60*float(params.rate)))
            else:
                amplitudes = numpy.ones(len(spikes))
            # Padding with `False` to avoid the insertion of partial spikes at
            # the edges of the signal.
            spikes[:N_t]   = False
            spikes[-N_t:]  = False
            # Find the indices of the spike samples.
            spikes         = numpy.where(spikes == True)[0]
            n_template     = N_tm_init + idx
            loc_template   = templates[:, n_template].toarray().reshape(N_e, N_t)
            first_flat     = loc_template.T.flatten()
            norm_flat      = numpy.sum(first_flat ** 2)
            # For each index (i.e. spike sample location) add the spike to the
            # chunk of data.
            refractory     = int(5 * 1e-3 * params.rate)         
            t_last         = - refractory
            for scount, spike in enumerate(spikes):
                if (spike - t_last) > refractory:
                    local_chunk[spike-template_shift:spike+template_shift+1, :] += amplitudes[scount]*loc_template.T
                    amp        = numpy.dot(local_chunk[spike-template_shift:spike+template_shift+1, :].flatten(), first_flat)
                    amp       /= norm_flat
                    result['real_amps']  += [amp]
                    result['spiketimes'] += [spike + offset]
                    result['amplitudes'] += [(amplitudes[scount], 0)]
                    result['templates']  += [n_template]
                    result['voltages']   += [local_chunk[spike, best_elecs[idx]]]
                    t_last                = spike

        # Write the results into the thread/process' files.
        spikes_to_write     = numpy.array(result['spiketimes'], dtype=numpy.uint32)
        amplitudes_to_write = numpy.array(result['amplitudes'], dtype=numpy.float32)
        templates_to_write  = numpy.array(result['templates'], dtype=numpy.int32)
        real_amps_to_write  = numpy.array(result['real_amps'], dtype=numpy.float32)
        voltages_to_write   = numpy.array(result['voltages'], dtype=numpy.float32)

        spiketimes_file.write(spikes_to_write.tostring())   
        amplitudes_file.write(amplitudes_to_write.tostring())
        templates_file.write(templates_to_write.tostring())
        real_amps_file.write(real_amps_to_write.tostring())
        voltages_file.write(voltages_to_write.tostring())

        #print count, 'spikes inserted...'
        #new_chunk    = numpy.zeros((chunk_size, N_total), dtype=numpy.float32)
        #new_chunk[:, nodes] = local_chunk

        # Overwrite the new chunk of data using explicit offset. 
        #new_chunk   = new_chunk.flatten()
        #g.Write_at(gidx * chunk_len, new_chunk)
        data_file_out.set_data(offset, local_chunk)

        # Update the progress bar about the generation of the benchmark.
        
    # Close the thread/process' files.
    spiketimes_file.flush()
    os.fsync(spiketimes_file.fileno())
    spiketimes_file.close()

    amplitudes_file.flush()
    os.fsync(amplitudes_file.fileno())
    amplitudes_file.close()

    templates_file.flush()
    os.fsync(templates_file.fileno())
    templates_file.close()

    real_amps_file.flush()
    os.fsync(real_amps_file.fileno())
    real_amps_file.close()

    voltages_file.flush()
    os.fsync(voltages_file.fileno())
    voltages_file.close()


    # Close the file for collective I/O.
    data_file_out.close()
    data_file.close()

    
    # Synchronize all the threads/processes.
    comm.Barrier()

    
    ## Eventually, perform all the administrative tasks.
    ## (i.e. files and folders management).

    file_params = file_out + '.params'

    if comm.rank == 0:
        # Create `injected` directory if it does not exist
        result_path = os.path.join(file_out, 'injected') 
        if not os.path.exists(result_path):
            os.makedirs(result_path)

        # Copy initial configuration file from `<dataset1>.params` to `<dataset2>.params`.
        shutil.copy2(params.get('data', 'data_file_noext') + '.params', file_params)
        new_params = CircusParser(file_name)
        # Copy initial basis file from `<dataset1>/<dataset1>.basis.hdf5` to
        # `<dataset2>/injected/<dataset2>.basis.hdf5.
        shutil.copy2(params.get('data', 'file_out') + '.basis.hdf5',
                     os.path.join(result_path, data_suff + '.basis.hdf5'))


        # Save templates into `<dataset>/<dataset>.templates.hdf5`.
        mydata = h5py.File(os.path.join(file_out, data_suff + '.templates.hdf5'), 'w')
        templates = templates.tocoo()
        if hdf5_compress:
            mydata.create_dataset('temp_x', data=templates.row, compression='gzip')
            mydata.create_dataset('temp_y', data=templates.col, compression='gzip')
            mydata.create_dataset('temp_data', data=templates.data, compression='gzip')
        else:
            mydata.create_dataset('temp_x', data=templates.row)
            mydata.create_dataset('temp_y', data=templates.col)
            mydata.create_dataset('temp_data', data=templates.data)
        mydata.create_dataset('temp_shape', data=numpy.array([N_e, N_t, templates.shape[1]],
                                                             dtype=numpy.int32))
        mydata.create_dataset('limits', data=limits)
        mydata.create_dataset('norms', data=norms)
        mydata.close()

        # Save electrodes into `<dataset>/<dataset>.clusters.hdf5`.
        mydata = h5py.File(os.path.join(file_out, data_suff + '.clusters.hdf5'), 'w')
        mydata.create_dataset('electrodes', data=best_elecs)
        mydata.close()

    comm.Barrier()
    if comm.rank == 0:
        # Gather data from all threads/processes.
        f_next, extension = os.path.splitext(file_name)
        file_out_bis = os.path.join(f_next, os.path.basename(f_next))
        #new_params.set('data', 'file_out', file_out_bis) # Output file without suffix
        #new_params.set('data', 'file_out_suff', file_out_bis  + params.get('data', 'suffix'))
    
        new_params.get_data_file()
        io.collect_data(comm.size, new_params, erase=True, with_real_amps=True, with_voltages=True, benchmark=True)
        # Change some flags in the configuration file.
        new_params.write('whitening', 'temporal', 'False') # Disable temporal filtering
        new_params.write('whitening', 'spatial', 'False') # Disable spatial filtering
        new_params.write('data', 'data_dtype', 'float32') # Set type of the data to float32
        new_params.write('data', 'dtype_offset', 'auto') # Set padding for data to auto
        # Move results from `<dataset>/<dataset>.result.hdf5` to
        # `<dataset>/injected/<dataset>.result.hdf5`.
        
        shutil.move(os.path.join(file_out, data_suff + '.result.hdf5'), os.path.join(result_path, data_suff + '.result.hdf5'))
                
        # Save scalings into `<dataset>/injected/<dataset>.scalings.npy`.
        numpy.save(os.path.join(result_path, data_suff + '.scalings'), scalings)

        file_name_noext, ext = os.path.splitext(file_name)

        # Copy basis from `<dataset>/injected/<dataset>.basis.hdf5` to
        # `<dataset>/<dataset>.basis.hdf5`.
        shutil.copy2(os.path.join(result_path, data_suff + '.basis.hdf5'),
                     os.path.join(file_out, data_suff + '.basis.hdf5'))

        if benchmark not in ['fitting', 'synchrony']:
            # Copy templates from `<dataset>/<dataset>.templates.hdf5` to
            # `<dataset>/injected/<dataset>.templates.hdf5`
            shutil.move(os.path.join(file_out, data_suff + '.templates.hdf5'),
                        os.path.join(result_path, data_suff + '.templates.hdf5'))
示例#6
0
def main(argv=None):

    if argv is None:
        argv = sys.argv[1:]

    parallel_hdf5 = h5py.get_config().mpi
    user_path = pjoin(os.path.expanduser('~'), 'spyking-circus')
    tasks_list = None

    if not os.path.exists(user_path):
        os.makedirs(user_path)

    try:
        import cudamat as cmt
        cmt.init()
        HAVE_CUDA = True
    except Exception:
        HAVE_CUDA = False

    all_steps = [
        'whitening', 'clustering', 'fitting', 'gathering', 'extracting',
        'filtering', 'converting', 'deconverting', 'benchmarking',
        'merging', 'validating', 'thresholding'
    ]

    config_file = os.path.abspath(pkg_resources.resource_filename('circus', 'config.params'))

    header = get_colored_header()
    header += Fore.GREEN + 'Local CPUs    : ' + Fore.CYAN + str(psutil.cpu_count()) + '\n'
    # header += Fore.GREEN + 'GPU detected  : ' + Fore.CYAN + str(HAVE_CUDA) + '\n'
    header += Fore.GREEN + 'Parallel HDF5 : ' + Fore.CYAN + str(parallel_hdf5) + '\n'

    do_upgrade = ''
    if not SHARED_MEMORY:
        do_upgrade = Fore.WHITE + '   [please consider upgrading MPI]'

    header += Fore.GREEN + 'Shared memory : ' + Fore.CYAN + str(SHARED_MEMORY) + do_upgrade + '\n'
    header += '\n'
    header += Fore.GREEN + "##################################################################"
    header += Fore.RESET

    method_help = '''by default, all steps are performed,
but a subset x,y can be done. Steps are:
 - filtering
 - whitening
 - clustering
 - fitting
 - merging [with or without a GUI for meta merging]
 - (extra) converting [export results to phy format]
 - (extra) thresholding [to get MUA activity only]
 - (extra) deconverting [import results from phy format]
 - (extra) gathering [force collection of results]
 - (extra) extracting [get templates from spike times]
 - (extra) benchmarking [with -o and -t]
 - (extra) validating [to compare performance with GT neurons]'''

    parser = argparse.ArgumentParser(description=header,
                                     formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('datafile', help='data file (or a list of commands if batch mode)')
    parser.add_argument('-i', '--info', help='list the file formats supported by SpyKING CIRCUS', action='store_true')
    parser.add_argument('-m', '--method',
                        default='filtering,whitening,clustering,fitting,merging',
                        help=method_help)
    parser.add_argument('-c', '--cpu', type=int, default=max(1, int(psutil.cpu_count()/2)), help='number of CPU')
    # parser.add_argument('-g', '--gpu', type=int, default=0, help='number of GPU')
    parser.add_argument('-H', '--hostfile', help='hostfile for MPI',
                        default=pjoin(user_path, 'circus.hosts'))
    parser.add_argument('-b', '--batch', help='datafile is a list of commands to launch, in a batch mode',
                        action='store_true')
    parser.add_argument('-p', '--preview', help='GUI to display the first second filtered with thresholds',
                        action='store_true')
    parser.add_argument('-r', '--result', help='GUI to display the results on top of raw data',
                        action='store_true')
    parser.add_argument('-s', '--second', type=int, default=0, help='If preview mode, begining of the preview [in s]')
    parser.add_argument('-e', '--extension', help='extension to consider for merging, converting and deconverting',
                        default='None')
    parser.add_argument('-o', '--output', help='output file [for generation of synthetic benchmarks]')
    parser.add_argument('-t', '--type', help='benchmark type',
                        choices=['fitting', 'clustering', 'synchrony'])

    if len(argv) == 0:
        parser.print_help()
        sys.exit(0)

    args = parser.parse_args(argv)

    steps = args.method.split(',')
    for step in steps:
        if step not in all_steps:
            print_error(['The method "%s" is not recognized' % step])
            sys.exit(0)

    # To save some typing later
    nb_gpu = 0
    (nb_cpu, hostfile, batch, preview, result, extension, output, benchmark, info, second) = \
        (args.cpu, args.hostfile, args.batch, args.preview, args.result, args.extension, args.output, args.type, args.info, args.second)
    filename = os.path.abspath(args.datafile)
    real_file = filename

    f_next, extens = os.path.splitext(filename)

    if info:
        if args.datafile.lower() in __supported_data_files__:
            filename = 'tmp'
            if len(__supported_data_files__[args.datafile.lower()].extension) > 0:
                filename += __supported_data_files__[args.datafile.lower()].extension[0]

            __supported_data_files__[args.datafile.lower()](filename, {}, is_empty=True)._display_requirements_()
        else:
            print_and_log([
                '',
                'To get info on any particular file format, do:',
                '>> spyking-circus file_format -i',
                ''
            ], 'default')
            print_and_log(list_all_file_format())
        sys.exit(0)

    if extens == '.params':
        print_error(['You should launch the code on the data file!'])
        sys.exit(0)

    file_params = f_next + '.params'
    if not os.path.exists(file_params) and not batch:
        print(Fore.RED + 'The parameter file %s is not present!' % file_params)
        create_params = query_yes_no(Fore.WHITE + "Do you want SpyKING CIRCUS to create a parameter file?")

        if create_params:
            print(Fore.WHITE + "Creating %s" % file_params)
            print(Fore.WHITE + "Fill it properly before launching the code! (see documentation)")
            print_info(['Keep in mind that filtering is performed on site, so please',
                        'be sure to keep a copy of your data elsewhere'])
            shutil.copyfile(config_file, file_params)
        sys.exit(0)
    elif batch:
        tasks_list = filename

    if not batch:
        file_params = f_next + '.params'

        if not os.path.exists(file_params):
            print_and_log(["%s does not exist" % file_params], 'error')
            sys.exit(0)

        import ConfigParser as configparser
        parser = configparser.ConfigParser()
        myfile = open(file_params, 'r')
        lines = myfile.readlines()
        myfile.close()
        myfile = open(file_params, 'w')
        for l in lines:
            myfile.write(l.replace('\t', ''))
        myfile.close()

        parser.read(file_params)

        for section in CircusParser.__all_sections__:
            if parser.has_section(section):
                for (key, value) in parser.items(section):
                    parser.set(section, key, value.split('#')[0].rstrip())
            else:
                parser.add_section(section)

        try:
            use_output_dir = parser.get('data', 'output_dir') != ''
        except Exception:
            use_output_dir = False

        if use_output_dir:
            path = os.path.abspath(os.path.expanduser(parser.get('data', 'output_dir')))
            file_out = os.path.join(path, os.path.basename(f_next))
            if not os.path.exists(file_out):
                os.makedirs(file_out)
        else:
            file_out = f_next


        logfile = file_out + '.log'
        if os.path.exists(logfile):
            os.remove(logfile)

        logger = init_logging(logfile)
        params = CircusParser(filename)
        data_file = params.get_data_file(source=True, has_been_created=False)
        overwrite = params.getboolean('data', 'overwrite')
        file_format = params.get('data', 'file_format')
        if overwrite:
            support_parallel_write = data_file.parallel_write
            is_writable = data_file.is_writable
        else:
            support_parallel_write = __supported_data_files__['raw_binary'].parallel_write
            is_writable = __supported_data_files__['raw_binary'].is_writable

    if preview:
        print_and_log(['Preview mode, showing only seconds [%d-%d] of the recording' % (second, second+1)], 'info', logger)
        tmp_path_loc = os.path.join(os.path.abspath(params.get('data', 'file_out')), 'tmp')

        if not os.path.exists(tmp_path_loc):
            os.makedirs(tmp_path_loc)

        filename = os.path.join(tmp_path_loc, 'preview.dat')
        f_next, extens = os.path.splitext(filename)
        preview_params = f_next + '.params'
        shutil.copyfile(file_params, preview_params)
        steps = ['filtering', 'whitening']

        chunk_size = int(params.rate)

        data_file.open()
        nb_chunks, _ = data_file.analyze(chunk_size)

        if nb_chunks <= (second + 1):
            print_and_log(['Recording is too short to display seconds [%d-%d]' % (second, second+1)])
            sys.exit(0)
        local_chunk = data_file.get_snippet(int(second*params.rate), int(1.2*chunk_size))
        description = data_file.get_description()
        data_file.close()

        new_params = CircusParser(filename, create_folders=False)

        new_params.write('data', 'chunk_size', '1')
        new_params.write('data', 'file_format', 'raw_binary')
        new_params.write('data', 'data_dtype', 'float32')
        new_params.write('data', 'data_offset', '0')
        new_params.write('data', 'dtype_offset', '0')
        new_params.write('data', 'stream_mode', 'None')
        new_params.write('data', 'overwrite', 'True')
        new_params.write('triggers', 'ignore_times', 'False')
        new_params.write('data', 'sampling_rate', str(params.rate))
        new_params.write('whitening', 'safety_time', '0')
        new_params.write('clustering', 'safety_time', '0')
        new_params.write('whitening', 'chunk_size', '1')
        new_params.write('data', 'preview_path', params.file_params)
        new_params.write('data', 'output_dir', '')

        description['data_dtype'] = 'float32'
        description['dtype_offset'] = 0
        description['data_offset'] = 0
        description['gain'] = 1.
        new_params = CircusParser(filename)
        data_file_out = new_params.get_data_file(is_empty=True, params=description)

        support_parallel_write = data_file_out.parallel_write
        is_writable = data_file_out.is_writable

        data_file_out.allocate(shape=local_chunk.shape, data_dtype=numpy.float32)
        data_file_out.open('r+')
        data_file_out.set_data(0, local_chunk)
        data_file_out.close()

    if tasks_list is not None:
        with open(tasks_list, 'r') as f:
            for line in f:
                if len(line) > 0:
                    subprocess.check_call(['spyking-circus'] + line.replace('\n', '').split(" "))
    else:

        print_and_log(['Config file: %s' % (f_next + '.params')], 'debug', logger)
        print_and_log(['Data file  : %s' % filename], 'debug', logger)

        print(get_colored_header())
        print(Fore.GREEN + "File          : " + Fore.CYAN + real_file)
        if preview:
            print(Fore.GREEN + "Steps         : " + Fore.CYAN + "preview mode")
        elif result:
            print(Fore.GREEN + "Steps         : " + Fore.CYAN + "result mode")
        else:
            print(Fore.GREEN + "Steps         : " + Fore.CYAN + ", ".join(steps))
        # print Fore.GREEN + "GPU detected  : ", Fore.CYAN + str(HAVE_CUDA)
        print(Fore.GREEN + "Number of CPU : " + Fore.CYAN + str(nb_cpu) + "/" + str(psutil.cpu_count()))
        # if HAVE_CUDA:
        #     print Fore.GREEN + "Number of GPU : ", Fore.CYAN + str(nb_gpu)
        print(Fore.GREEN + "Parallel HDF5 : " + Fore.CYAN + str(parallel_hdf5))

        do_upgrade = ''
        use_shared_memory = get_shared_memory_flag(params)
        if not SHARED_MEMORY:
            do_upgrade = Fore.WHITE + '   [please consider upgrading MPI]'

        print(Fore.GREEN + "Shared memory : " + Fore.CYAN + str(use_shared_memory) + do_upgrade)
        print(Fore.GREEN + "Hostfile      : " + Fore.CYAN + hostfile)
        print("")
        print(Fore.GREEN + "##################################################################")
        print("")
        print(Fore.RESET)

        # Launch the subtasks
        subtasks = [('filtering', 'mpirun'),
                    ('whitening', 'mpirun'),
                    ('clustering', 'mpirun'),
                    ('fitting', 'mpirun'),
                    ('extracting', 'mpirun'),
                    ('gathering', 'python'),
                    ('converting', 'mpirun'),
                    ('deconverting', 'mpirun'),
                    ('benchmarking', 'mpirun'),
                    ('merging', 'mpirun'),
                    ('validating', 'mpirun'),
                    ('thresholding', 'mpirun')]

        # if HAVE_CUDA and nb_gpu > 0:
        #     use_gpu = 'True'
        # else:
        use_gpu = 'False'

        time = data_stats(params) / 60.0

        if preview:
            params = new_params

        if nb_cpu < psutil.cpu_count():
            if use_gpu != 'True' and not result:
                print_and_log(['Using only %d out of %d local CPUs available (-c to change)' % (nb_cpu, psutil.cpu_count())], 'info', logger)

        if params.getboolean('detection', 'matched-filter') and not params.getboolean('clustering', 'smart_search'):
            print_and_log(['Smart Search should be activated for matched filtering'], 'info', logger)

        if time > 30 and not params.getboolean('clustering', 'smart_search'):
            print_and_log(['Smart Search should be activated for long recordings'], 'info', logger)

        n_edges = get_averaged_n_edges(params)
        if n_edges > 100 and not params.getboolean('clustering', 'compress'):
            print_and_log(['Template compression is highly recommended based on parameters'], 'info', logger)

        if not result:
            for subtask, command in subtasks:
                if subtask in steps:
                    if command == 'python':
                        # Directly call the launcher
                        try:
                            circus.launch(subtask, filename, nb_cpu, nb_gpu, use_gpu)
                        except:
                            print_and_log(['Step "%s" failed!' % subtask], 'error', logger)
                            sys.exit(0)
                    elif command == 'mpirun':
                        # Use mpirun to make the call
                        mpi_args = gather_mpi_arguments(hostfile, params)
                        one_cpu = False

                        if subtask in ['filtering', 'benchmarking'] and not is_writable:
                            if not preview and overwrite:
                                print_and_log(['The file format %s is read only!' % file_format,
                                               'You should set overwite to False, to create a copy of the data.',
                                               'However, note that if you have streams, informations on times',
                                               'will be discarded'], 'info', logger)
                                sys.exit(0)

                        if subtask in ['filtering'] and not support_parallel_write and (args.cpu > 1):
                            print_and_log(['No parallel writes for %s: only 1 node used for %s' %(file_format, subtask)], 'info', logger)
                            nb_tasks = str(1)
                            one_cpu = True

                        else:
                            if subtask != 'fitting':
                                nb_tasks = str(args.cpu)
                            else:
                                # if use_gpu == 'True':
                                #     nb_tasks = str(args.gpu)
                                # else:
                                nb_tasks = str(args.cpu)

                        if subtask == 'benchmarking':
                            if (output is None) or (benchmark is None):
                                print_and_log(["To generate synthetic datasets, you must provide output and type"], 'error', logger)
                                sys.exit(0)
                            mpi_args += [
                                '-np', nb_tasks, 'spyking-circus-subtask',
                                subtask, filename, str(nb_cpu), str(nb_gpu),
                                use_gpu, output, benchmark
                            ]
                        elif subtask in ['merging', 'converting']:
                            mpi_args += [
                                '-np', nb_tasks, 'spyking-circus-subtask',
                                subtask, filename, str(nb_cpu), str(nb_gpu),
                                use_gpu, extension
                            ]
                        elif subtask in ['deconverting']:
                            nb_tasks = str(1)
                            nb_cpu = 1
                            mpi_args += [
                                '-np', nb_tasks, 'spyking-circus-subtask', subtask,
                                filename, str(nb_cpu), str(nb_gpu), use_gpu,
                                extension
                            ]
                        else:
                            mpi_args += [
                                '-np', nb_tasks, 'spyking-circus-subtask',
                                subtask, filename, str(nb_cpu), str(nb_gpu),
                                use_gpu, str(one_cpu)
                            ]

                        print_and_log(['Launching task %s' % subtask], 'debug', logger)
                        print_and_log(['Command: %s' % str(mpi_args)], 'debug', logger)

                        try:
                            subprocess.check_call(mpi_args)
                        except subprocess.CalledProcessError as e:
                            print_and_log(['Step "%s" failed for reason %s!' % (subtask, e)], 'error', logger)
                            sys.exit(0)

    if preview or result:
        from circus.shared import gui
        import pylab
        try:
            from PyQt5.QtWidgets import QApplication
        except ImportError:
            from matplotlib.backends import qt_compat
            use_pyside = qt_compat.QT_API == qt_compat.QT_API_PYSIDE
            if use_pyside:
                from PySide.QtGui import QApplication
            else:
                from PyQt4.QtGui import QApplication
        app = QApplication([])
        try:
            pylab.style.use('ggplot')
        except Exception:
            pass

        if preview:
            print_and_log(['Launching the preview GUI...'], 'debug', logger)
            mygui = gui.PreviewGUI(new_params)
            shutil.rmtree(tmp_path_loc)
        elif result:
            data_file = params.get_data_file()
            print_and_log(['Launching the result GUI...'], 'debug', logger)
            mygui = gui.PreviewGUI(params, show_fit=True)
        sys.exit(app.exec_())
def get_dataset(self):
    dirname = os.path.abspath(os.path.join(os.path.dirname(__file__), '.'))
    filename = os.path.join(dirname, 'data')
    if not os.path.exists(filename):
        os.makedirs(filename)
    result = os.path.join(filename, 'data')
    filename = os.path.join(filename, 'data.dat')
    if not os.path.exists(filename):
        print "Generating a synthetic dataset of 4 channels, 1min at 20kHz..."
        sampling_rate = 20000
        N_total = 4
        gain = 0.5
        data = (gain *
                numpy.random.randn(sampling_rate * N_total * 1 * 60)).astype(
                    numpy.float32)
        myfile = open(filename, 'w')
        myfile.write(data.tostring())
        myfile.close()

    src_path = os.path.abspath(os.path.join(dirname, 'snippet'))

    if not os.path.exists(result):
        os.makedirs(result)
        shutil.copy(os.path.join(src_path, 'test.basis.hdf5'),
                    os.path.join(result, 'data.basis.hdf5'))
        shutil.copy(os.path.join(src_path, 'test.templates.hdf5'),
                    os.path.join(result, 'data.templates.hdf5'))
        shutil.copy(os.path.join(src_path, 'test.clusters.hdf5'),
                    os.path.join(result, 'data.clusters.hdf5'))

    config_file = os.path.abspath(
        pkg_resources.resource_filename('circus', 'config.params'))
    file_params = os.path.abspath(filename.replace('.dat', '.params'))
    if not os.path.exists(file_params):

        shutil.copyfile(config_file, file_params)
        probe_file = os.path.join(src_path, 'test.prb')
        parser = CircusParser(filename, mapping=probe_file)
        parser.write('data', 'file_format', 'raw_binary')
        parser.write('data', 'data_offset', '0')
        parser.write('data', 'data_dtype', 'float32')
        parser.write('data', 'sampling_rate', '20000')
        parser.write('whitening', 'temporal', 'False')
        parser.write('data', 'mapping', probe_file)
        parser.write('clustering', 'make_plots', 'png')
        parser.write('clustering', 'nb_repeats', '3')
        parser.write('detection', 'N_t', '3')
        parser.write('clustering', 'smart_search', 'False')
        parser.write('clustering', 'max_elts', '10000')
        parser.write('noedits', 'filter_done', 'True')
        parser.write('clustering', 'extraction', 'median-raw')

    a, b = os.path.splitext(os.path.basename(filename))
    c, d = os.path.splitext(filename)
    file_out = os.path.join(os.path.abspath(c), a)

    return filename
class TestClustering(unittest.TestCase):
    def setUp(self):
        self.all_matches = None
        self.all_templates = None
        dirname = os.path.abspath(os.path.join(os.path.dirname(__file__), '.'))
        self.path = os.path.join(dirname, 'synthetic')
        if not os.path.exists(self.path):
            os.makedirs(self.path)
        self.file_name = os.path.join(self.path, 'clustering.dat')
        self.source_dataset = get_dataset(self)
        if not os.path.exists(self.file_name):
            mpi_launch('benchmarking', self.source_dataset, 2, 0, 'False',
                       self.file_name, 'clustering', 1)
            mpi_launch('whitening', self.file_name, 2, 0, 'False')

        self.parser = CircusParser(self.file_name)
        self.parser.write('clustering', 'max_elts', '1000')

    def test_clustering_one_CPU(self):
        mpi_launch('clustering', self.file_name, 1, 0, 'False')
        res = get_performance(self.file_name, 'one_CPU')
        if self.all_templates is None:
            self.all_templates = res[0]
            self.all_matches = res[1]

    def test_clustering_two_CPU(self):
        mpi_launch('clustering', self.file_name, 2, 0, 'False')
        res = get_performance(self.file_name, 'two_CPU')
        if self.all_templates is None:
            self.all_templates = res[0]
            self.all_matches = res[1]

    def test_clustering_pca(self):
        self.parser.write('clustering', 'extraction', 'median-pca')
        mpi_launch('clustering', self.file_name, 2, 0, 'False')
        self.parser.write('clustering', 'extraction', 'median-raw')
        res = get_performance(self.file_name, 'median-pca')
        if self.all_templates is None:
            self.all_templates = res[0]
            self.all_matches = res[1]

    def test_clustering_nb_passes(self):
        self.parser.write('clustering', 'nb_repeats', '1')
        mpi_launch('clustering', self.file_name, 2, 0, 'False')
        self.parser.write('clustering', 'nb_repeats', '3')
        res = get_performance(self.file_name, 'nb_passes')
        if self.all_templates is None:
            self.all_templates = res[0]
            self.all_matches = res[1]

    def test_clustering_sim_same_elec(self):
        self.parser.write('clustering', 'sim_same_elec', '5')
        mpi_launch('clustering', self.file_name, 2, 0, 'False')
        self.parser.write('clustering', 'sim_same_elec', '3')
        res = get_performance(self.file_name, 'sim_same_elec')
        if self.all_templates is None:
            self.all_templates = res[0]
            self.all_matches = res[1]

    def test_clustering_cc_merge(self):
        self.parser.write('clustering', 'cc_merge', '0.8')
        mpi_launch('clustering', self.file_name, 2, 0, 'False')
        self.parser.write('clustering', 'cc_merge', '0.95')
        res = get_performance(self.file_name, 'cc_merge')
        if self.all_templates is None:
            self.all_templates = res[0]
            self.all_matches = res[1]

    def test_remove_mixtures(self):
        self.parser.write('clustering', 'remove_mixtures', 'False')
        mpi_launch('clustering', self.file_name, 2, 0, 'False')
        self.parser.write('clustering', 'remove_mixtures', 'True')
        res = get_performance(self.file_name, 'cc_merge')
        if self.all_templates is None:
            self.all_templates = res[0]
            self.all_matches = res[1]
示例#9
0
class TestFitting(unittest.TestCase):
    def setUp(self):
        self.all_spikes = None
        self.max_chunk = '100'
        dirname = os.path.abspath(os.path.join(os.path.dirname(__file__), '.'))
        self.path = os.path.join(dirname, 'synthetic')
        if not os.path.exists(self.path):
            os.makedirs(self.path)
        self.file_name = os.path.join(self.path, 'fitting.dat')
        self.source_dataset = get_dataset(self)
        if not os.path.exists(self.file_name):
            mpi_launch('benchmarking', self.source_dataset, 2, 0, 'False',
                       self.file_name, 'fitting', 1)
            mpi_launch('whitening', self.file_name, 2, 0, 'False')
        self.parser = CircusParser(self.file_name)

    def test_fitting_one_CPU(self):
        self.parser.write('fitting', 'max_chunk', self.max_chunk)
        mpi_launch('fitting', self.file_name, 1, 0, 'False')
        self.parser.write('fitting', 'max_chunk', 'inf')
        res = get_performance(self.file_name, 'one_CPU')
        if self.all_spikes is None:
            self.all_spikes = res
        assert numpy.all(self.all_spikes == res)

    def test_fitting_two_CPUs(self):
        self.parser.write('fitting', 'max_chunk', self.max_chunk)
        mpi_launch('fitting', self.file_name, 2, 0, 'False')
        self.parser.write('fitting', 'max_chunk', 'inf')
        res = get_performance(self.file_name, 'two_CPU')
        if self.all_spikes is None:
            self.all_spikes = res
        assert numpy.all(self.all_spikes == res)

    def test_fitting_one_GPU(self):
        HAVE_CUDA = False
        try:
            import cudamat
            HAVE_CUDA = True
        except ImportError:
            pass
        if HAVE_CUDA:
            self.parser.write('fitting', 'max_chunk', self.max_chunk)
            mpi_launch('fitting', self.file_name, 1, 0, 'False')
            self.parser.write('fitting', 'max_chunk', 'inf')
            res = get_performance(self.file_name, 'one_GPU')
            if self.all_spikes is None:
                self.all_spikes = res
            assert numpy.all(self.all_spikes == res)

    def test_fitting_large_chunks(self):
        self.parser.write('fitting', 'chunk_size', '1')
        self.parser.write('fitting', 'max_chunk',
                          str(int(self.max_chunk) // 2))
        mpi_launch('fitting', self.file_name, 2, 0, 'False')
        self.parser.write('fitting', 'max_chunk', 'inf')
        self.parser.write('fitting', 'chunk_size', '0.5')
        res = get_performance(self.file_name, 'large_chunks')
        if self.all_spikes is None:
            self.all_spikes = res
        assert numpy.all(self.all_spikes == res)
示例#10
0
class Circus:
    """ Run Spyking Circus"""

    def __init__(self, paths, main_params, sensors):
        '''
        Set the minimum startup environment for Spyking Circus

        Parameters
        ----------
        paths : dictionary
            All necessary folders and files:
                -- paths['case']
                -- paths['npy_file']
                -- paths['SPC_output']
                -- paths['SPC_params']
                -- paths['circus_updates']
                -- paths['circus_pkg']
                -- paths['SPC']
                -- paths['SPC_output']
                -- paths['circus_updates']
        main_params : dictionary
            Some parameters that are useful for setting:
                -- 'N_t'
                -- 'cut_off'
                -- 'cc_merge'
        sensors : str
            'grad' or 'mag'

        '''
        import numpy as np
        from shutil import copyfile
        ### Set paths and parameters
        self.main_params = main_params        
        self.case        = paths['case']
        self.npy_file    = paths['npy_file']
        self.sensors     = sensors
        self.output      = paths['SPC_output']
        self.path_params = paths['SPC_params']
        
        ### Update circus package
        self._update_circus_package_for_meg(paths['circus_updates'], 
                                            paths['circus_pkg'])
                
        ### Copy files
        copyfile(paths['circus_updates']/'config.params', paths['SPC_params'])
        copyfile(paths['circus_updates']/'meg_306.prb', paths['SPC']/'meg_306.prb')
        grad_idx = paths['circus_updates']/'grad_sensors_idx.npy'
        mag_idx  = paths['circus_updates']/'mag_sensors_idx.npy'
        copyfile(grad_idx, paths['SPC']/'grad_idx.npy')
        copyfile(mag_idx, paths['SPC']/'mag_idx.npy')
        
        ### Load sensors indexes
        self.main_params['grad_idx']    = str(np.load(grad_idx).tolist())
        self.main_params['mag_idx']     = str(np.load(mag_idx).tolist())
        self.main_params['stream_mode'] = 'None' #'multi-files'
        
 
    def _update_circus_package_for_meg(self, code_source, circus_pkg):
        '''
        Updates the circus package version to use additional features:
            -- SPLINE
            -- fitting process
            -- probe file
            -- param file
        Parameters
        ----------
        code_source : pathlib.PosixPath
            The place where the modified files are located
        circus_pkg : pathlib.PosixPath
            Path to the package Spyking Circus
        '''
        from shutil import copyfile
        
        dst = circus_pkg
        copyfile(code_source / 'config.params', dst / 'config.params')
        copyfile(code_source / 'meg_306.prb', dst / 'meg_306.prb')
        copyfile(code_source / 'clustering.py', dst / 'clustering.py')
        copyfile(code_source / 'fitting.py', dst / 'fitting.py')
        copyfile(code_source / 'parser.py', dst / 'shared' / 'parser.py')
        copyfile(code_source / 'algorithms.py', dst / 'shared' / 'algorithms.py')

    def set_params_spc(self, main_params, npy_file, output):
        '''
        Set parameters file for Spyking Circus

        Parameters
        ----------
        main_params : dictionary
            Some parameters that are useful for setting:
                -- 'N_t'
                -- 'cut_off'
                -- 'stream_mode'
                -- dead_channels ('grad_idx'/'mag_idx')
                -- 'cc_merge'
        npy_file : pathlib.PosixPath
            The path to the numpy data file in the CIRCUS folder
        output : pathlib.PosixPath
            The directory where the results will be saved. 
            Different for magnetometers and gradiometers
        '''
        from shutil import copyfile
        from circus.shared.parser import CircusParser

        self.params = CircusParser(npy_file, create_folders=False)
        ### data
        self.params.write('data','file_format','numpy')
        self.params.write('data','stream_mode', main_params['stream_mode'])
        self.params.write('data','mapping', str(output.parent / 'meg_306.prb'))
        self.params.write('data','output_dir',str(output))
        self.params.write('data','sampling_rate','1000')
        ### detection
        self.params.write('detection','radius', '6')
        self.params.write('detection','N_t', str(main_params['N_t']))
        #self.params.write('detection','spike_thresh', '6')
        self.params.write('detection','peaks','both')
        self.params.write('detection','alignment','False')
        self.params.write('detection','isolation','False')
        if  (self.sensors == 'mag'):
            grad = '{ 1 : %s}'%main_params['grad_idx']
            self.params.write('detection','dead_channels', grad)
        else:
            mag = '{ 1 : %s}'%main_params['mag_idx']
            self.params.write('detection','dead_channels', mag)
        ### filtering
        filt_param = '{}, {}'.format(main_params['cut_off'][0], 
                                     main_params['cut_off'][1])
        self.params.write('filtering','cut_off',filt_param)
        ### whitening
        self.params.write('whitening','safety_time','auto')
        self.params.write('whitening','max_elts','10000')
        self.params.write('whitening','nb_elts','0.1')
        self.params.write('whitening','spatial','False')
        ### clustering
        self.params.write('clustering','extraction','mean-raw')
        self.params.write('clustering','safety_space','False')
        self.params.write('clustering','safety_time','1')
        self.params.write('clustering','max_elts','10000')
        self.params.write('clustering','nb_elts','0.001')
        self.params.write('clustering','nclus_min','0.0001')
        self.params.write('clustering','smart_search','False')
        self.params.write('clustering','sim_same_elec','1')
        self.params.write('clustering','sensitivity','5')
        self.params.write('clustering','cc_merge', str(main_params['cc_merge']))
        self.params.write('clustering','dispersion','(5, 5)')
        self.params.write('clustering','noise_thr','0.9')
        #self.params.write('clustering','remove_mixture','False')
        self.params.write('clustering','cc_mixtures','0.1')
        self.params.write('clustering','make_plots','png')
        ### fitting
        self.params.write('fitting','chunk_size','60')
        self.params.write('fitting','amp_limits','(0.01,10)')
        self.params.write('fitting','amp_auto','False')
        self.params.write('fitting','collect_all','True')
        ### merging
        self.params.write('merging','cc_overlap','0.4')
        self.params.write('merging','cc_bin','200')
        
        self.params = CircusParser(npy_file, create_folders=False)
        copyfile(self.path_params, output / 'config.param')

    def _run_command_and_print_output(self, command):
        from subprocess import Popen, PIPE
        output = []
        errors = []
        #command_list = shlex.split(command, posix="win" not in sys.platform)
        #command_list, shell=False
        with Popen(command, stdout=PIPE, stderr=PIPE, shell=True) as process:
            while True:
                output_stdout = process.stdout.readline()
                output_stderr = process.stderr.readline()
                if (not output_stdout) and (not output_stderr) and (process.poll() is not None):
                    break
                if output_stdout:
                    output.append(output_stdout.decode())
                if output_stderr:
                    errors.append(output_stderr.decode())
            rc = process.poll()
            return rc, output, errors
        
    def _out_in_file(self, file, out):
        '''
        Write log information in file
        
        '''
        with open(file, 'w') as f:
            for item in out:
                f.write(item)

    def run_circus(self, output, file_npy, n_cores=4, only_fitting=False, multi=False):
        '''
        Run Spyking Circus

        Parameters
        ----------
        output : pathlib.PosixPath
            The directory where the results will be saved. 
            Different for magnetometers and gradiometers
        npy_file : pathlib.PosixPath
            The path to the numpy data file in the CIRCUS folder
        n_cores : int, optional
            Number of processor cores. The default is 4.
        only_fitting : bool, optional
            Run only fitting step. The default is False.
        multi : bool, optional
            Save results to different files if 'stream_mode' is 'multi-files'.
            The default is False.

        '''
        if only_fitting:
            methods = 'filtering,fitting'
            cmd = 'spyking-circus %s -m %s -c %s'%(str(file_npy), methods, n_cores)
            cmd_multi = 'circus-multi %s'%(str(file_npy))
        else:
            methods = 'filtering,whitening,clustering,fitting'
            cmd = 'spyking-circus %s -m %s -c %s'%(str(file_npy), methods, n_cores)
            cmd_multi = 'circus-multi %s'%(str(file_npy))
        
        p, out, err = self._run_command_and_print_output(cmd)
        self._out_in_file(output / 'output_log.txt', out)
        if err != []:
            self._out_in_file(output / 'error_log.txt', err) 
        if multi:
            p, out_m, err_m = self._run_command_and_print_output(cmd_multi)
            if err_m != []:
                self._out_in_file(output / 'err_multi_log.txt', err_m)
   
    def results_to_excel(self, circus_params, path_save_results):
        '''
        Convert results to excel table.

        Parameters
        ----------
        circus_params : circus.shared.parser.CircusParser 
            self.params
        path_save_results : pathlib.PosixPath
            The directory where the results will be saved. 

        '''
        import pandas as pd
        from circus.shared.files import load_data
        results = load_data(circus_params, 'results')
        frames =[]
        for key in results['spiketimes'].keys():
            sp = results['spiketimes'][key]
            amp = results['amplitudes'][key][:,0]
            frames.append(pd.DataFrame(data={'Spiketimes': sp, 'Amplitudes': amp, 'Template': key}))
        templates=pd.concat(frames,ignore_index=True)
        file = path_save_results / 'Templates_{}.xlsx'.format(self.sensors)
        templates.to_excel(file, index=False)
        del templates, results