示例#1
0
def get_dest_arf(filename, dry_run):
    """Returns handle for destination arf file"""
    if dry_run:
        fp = arf.open_file(filename + ".arf", mode="a",
                           driver="core", backing_store=False)
    else:
        fp = arf.open_file(filename + ".arf", mode="w-")
        arf.set_attributes(
            fp, file_creator='org.meliza.arfx/arfxplog ' + core.__version__)
        log.info("opened '%s.arf' for writing", filename)
    return fp
示例#2
0
文件: test_arf.py 项目: pmalonis/arf
def test06_creation_iter():
    fp = arf.open_file("test06", mode="a", driver="core", backing_store=False)
    entry_names = ('z', 'y', 'a', 'q', 'zzyfij')
    for name in entry_names:
        g = arf.create_entry(fp, name, 0)
        arf.create_dataset(g, "dset", (), sampling_rate=1)
    assert_sequence_equal(arf.keys_by_creation(fp), entry_names)
示例#3
0
def test07_append_to_table():
    fp = arf.open_file("test07", mode="a", driver="core", backing_store=False)
    dtype = nx.dtype({'names': ("f1","f2"), 'formats': [nx.uint, nx.int32]})
    dset = arf.create_table(fp, 'test', dtype=dtype)
    assert_equal(dset.shape[0], 0)
    arf.append_data(dset, (5, 10))
    assert_equal(dset.shape[0], 1)
示例#4
0
def createtemparf(filename, datatype=0):
    root, ext = os.path.splitext(filename)
    arffile = arf.open_file(tempfile.mktemp())
    if ext == '.lbl':
        lbl_rec = lbl.read(filename)
        print(lbl_rec)
        dset = arf.create_dataset(arffile, os.path.split(filename)[-1],
                                  lbl_rec, units=['','s','s'], datatype=2002)
        dset.attrs['units'] = 's'
    elif ext == '.wav':
        wavfile = ewave.open(filename)        
        arf.create_dataset(arffile, os.path.split(filename)[-1], wavfile.read(),
                           sampling_rate=wavfile.sampling_rate, datatype=1)
    elif ext =='.pcm':
        from arfx import pcmio
        pcmfile = pcmio.open(filename)
        arf.create_dataset(arffile, os.path.split(filename)[-1], pcmfile.read(),
                           sampling_rate=pcmfile.sampling_rate, datatype=datatype)
    elif ext == '.pcm_seq2':
        from arfx import io
        pcmseqfile = io.open(filename)
        dataset_basename = os.path.split(filename)[-1]
        for i in xrange(pcmseqfile.nentries):
            dataset_name = '_'.join([dataset_basename, str(i)])
            arf.create_dataset(arffile, dataset_name, pcmseqfile.read(),
                               sampling_rate=pcmseqfile.sampling_rate, 
                               timestamp=pcmseqfile.timestamp, datatype=datatype)
            #try block added because pcmseqfile.nentries doesn't seem to always be accurate
            try:
                pcmseqfile.entry += 1
            except ValueError:
                continue

    return arffile['/']
示例#5
0
    def save(stream, filename, path, sampling_rate=None, chunk_size=None):
        """ Saves a Stream object to an .arf file.
            Can't be called by an instance of ArfStreamer.
        """
        if chunk_size == None:
            chunk_size = stream.chunk_size

        if sampling_rate == None:
            raise Exception("You must specify the sampling rate in ArfStreamer.save")
        
        with arf.open_file(filename, 'a') as file:
            path = path.split("/")
            dst_name = path[-1]
            grp_path = "/".join(path[:-1])
            grp = file.require_group(grp_path)            
            #Get first batch of data
            data = stream.read(chunk_size)
            try:
                dst = arf.create_dataset(grp, dst_name, data,
                    maxshape=(None,), sampling_rate=sampling_rate)
            except:
                raise ValueError('Error, maybe dataset with that name already exists')
            while True:
                data = stream.read(chunk_size)
                if len(data) == 0:
                    break
                arf.append_data(dst, data)
            file.flush()
示例#6
0
def arf2bark(arf_file,
             root_path,
             timezone,
             verbose,
             mangle_prefix=ENTRY_PREFIX):
    with arf.open_file(arf_file, 'r') as af:
        os.mkdir(root_path)
        root = bark.Root(root_path)
        if verbose:
            print('Created Root: ' + root_path)
        tle = None
        found_trigin = False
        for ename, entry in af.items():  # entries and top-level datasets
            if isinstance(entry, h5py.Group):  # entries
                entry_path = os.path.join(root_path, ename)
                entry_attrs = copy_attrs(entry.attrs)
                for pos_arg in ('name', 'parents'):
                    # along with 'timestamp' below, these are positional arguments to create_entry
                    # for now, I prefer hard-coding them over messing with runtime introspection
                    new_name = pos_arg
                    while new_name in entry_attrs:
                        new_name = '{}_{}'.format(mangle_prefix, new_name)
                    try:
                        entry_attrs[new_name] = entry_attrs.pop(pos_arg)
                    except KeyError:
                        pass
                    else:
                        if verbose:
                            print('Renamed attribute {} of entry {} to {}'.
                                  format(pos_arg, ename, new_name))
                timestamp = entry_attrs.pop('timestamp')
                if timezone:
                    timestamp = bark.convert_timestamp(timestamp, timezone)
                else:
                    timestamp = bark.convert_timestamp(timestamp)
                bark_entry = bark.create_entry(entry_path,
                                               timestamp,
                                               parents=False,
                                               **entry_attrs)
                if verbose:
                    print('Created Entry: ' + entry_path)
                for ds_name, dataset in entry.items():  # entry-level datasets
                    if ds_name == 'trig_in':  # accessing trig_in -> segfault
                        found_trigin = True  # and skip the dataset
                    else:
                        transfer_dset(ds_name, dataset, entry_path, verbose)
            elif isinstance(entry, h5py.Dataset):  # top-level datasets
                if arf.is_time_series(entry) or arf.is_marked_pointproc(entry):
                    if tle is None:
                        path = os.path.join(root_path, 'top_level')
                        tle = bark.create_entry(path, 0, parents=False).path
                    transfer_dset(ename, entry, tle, verbose)
                else:
                    unknown_ds_warning(ename)  # and skip, w/o creating TLE
        if found_trigin:
            print('Warning: found datasets named "trig_in". Jill-created ' +
                  '"trig_in" datasets segfault when read, so these datasets' +
                  ' were skipped. If you know the datasets are good, rename' +
                  ' them and try again.')
    return bark.Root(root_path)
示例#7
0
def main(kwikfile, datatypes, arf_name):
    if not  datatypes:
        datatypes = [0]
    if not arf_name:
        arf_name = os.path.splitext(kwikfile)[0] + ".arf"
    with h5py.File(kwikfile, "r") as kfile, arf.open_file(arf_name, "w") as afile:
        copy(kfile, afile, datatypes)
示例#8
0
def test07_append_to_table():
    fp = arf.open_file("test07", mode="a", driver="core", backing_store=False)
    dtype = nx.dtype({'names': ("f1","f2"), 'formats': [nx.uint, nx.int32]})
    dset = arf.create_table(fp, 'test', dtype=dtype)
    assert_equal(dset.shape[0], 0)
    arf.append_data(dset, (5, 10))
    assert_equal(dset.shape[0], 1)
示例#9
0
def main(kwikfiles, datatypes):
    if not datatypes:
        datatypes = [0]
    for kwikfile in kwikfiles:
        arf_name = os.path.splitext(kwikfile)[0] + ".arf"
        with h5py.File(kwikfile, "r") as kfile, arf.open_file(arf_name,
                                                              "w") as afile:
            copy(kfile, afile, datatypes)
示例#10
0
def arf2bark(arf_file, root_path, timezone, verbose):
    with arf.open_file(arf_file, 'r') as af:
        os.mkdir(root_path)
        root = bark.Root(root_path)
        if verbose:
            print('Created Root: ' + root_path)
        tle = None
        found_trigin = False
        for ename, entry in af.items(): # entries and top-level datasets
            if isinstance(entry, h5py.Group): # entries
                entry_path = os.path.join(root_path, ename)
                entry_attrs = copy_attrs(entry.attrs)
                timestamp = entry_attrs.pop('timestamp')
                # rename 'name' attribute created by openephys arf module
                try:
                    entry_attrs['openephys_name'] = entry_attrs.pop('name')
                except KeyError:
                    pass
                if timezone:
                    timestamp = bark.convert_timestamp(timestamp, timezone)
                else:
                    timestamp = bark.convert_timestamp(timestamp)

                bark_entry = bark.create_entry(entry_path,
                                               timestamp,
                                               parents=False,
                                               **entry_attrs)
                if verbose:
                    print('Created Entry: ' + entry_path)
                for ds_name, dataset in entry.items(): # entry-level datasets
                    if ds_name == 'trig_in': # accessing trig_in -> segfault
                        found_trigin = True # and skip the dataset
                    else:
                        transfer_dset(ds_name, dataset, entry_path, verbose)
            elif isinstance(entry, h5py.Dataset): # top-level datasets
                if arf.is_time_series(entry) or arf.is_marked_pointproc(entry):
                    if tle is None:
                        path = os.path.join(root_path, 'top_level')
                        tle = bark.create_entry(path, 0, parents=False).path
                    transfer_dset(ename, entry, tle, verbose)
                else:
                    unknown_ds_warning(ename) # and skip, w/o creating TLE
        if found_trigin:
            print('Warning: found datasets named "trig_in". Jill-created ' +
                  '"trig_in" datasets segfault when read, so these datasets' +
                  ' were skipped. If you know the datasets are good, rename' +
                  ' them and try again.')
    return bark.Root(root_path)
示例#11
0
def createtemparf(filename, datatype=0):
    root, ext = os.path.splitext(filename)
    arffile = arf.open_file(tempfile.mktemp())
    if ext == '.lbl':
        lbl_rec = lbl.read(filename)
        print(lbl_rec)
        dset = arf.create_dataset(arffile, os.path.split(filename)[-1],
                                  lbl_rec, units=['','s','s'], datatype=2002)
        dset.attrs['units'] = 's'
    elif ext == '.wav':
        wavfile = ewave.open(filename)        
        arf.create_dataset(arffile, os.path.split(filename)[-1], wavfile.read(),
                           sampling_rate=wavfile.sampling_rate, datatype=1)
    elif ext =='.pcm':
        from arfx import pcmio
        pcmfile = pcmio.open(filename)
        arf.create_dataset(arffile, os.path.split(filename)[-1], pcmfile.read(),
                           sampling_rate=pcmfile.sampling_rate, datatype=datatype)
    return arffile['/']
示例#12
0
def arf2bark(arf_file, root_parent, timezone, verbose):
    with arf.open_file(arf_file, 'r') as af:
        # root
        root_dirname = os.path.splitext(arf_file)[0]
        root_path = os.path.join(os.path.abspath(root_parent), root_dirname)
        os.mkdir(root_path)
        root = bark.Root(root_path)
        if verbose:
            print('Created Root: ' + root_path)
        tle = None
        found_trigin = False
        for ename, entry in af.items():  # entries and top-level datasets
            if isinstance(entry, h5py.Group):  # entries
                entry_path = os.path.join(root_path, ename)
                entry_attrs = copy_attrs(entry.attrs)
                timestamp = entry_attrs.pop('timestamp')
                if timezone:
                    timestamp = bark.convert_timestamp(timestamp, timezone)
                else:
                    timestamp = bark.convert_timestamp(timestamp)
                bark_entry = bark.create_entry(entry_path,
                                               timestamp,
                                               parents=False,
                                               **entry_attrs)
                if verbose:
                    print('Created Entry: ' + entry_path)
                for ds_name, dataset in entry.items():  # entry-level datasets
                    if ds_name == 'trig_in':  # accessing trig_in -> segfault
                        found_trigin = True  # and skip the dataset
                    else:
                        transfer_dset(ds_name, dataset, entry_path, verbose)
            elif isinstance(entry, h5py.Dataset):  # top-level datasets
                if tle is None:
                    path = os.path.join(root_path, 'top_level')
                    tle = bark.create_entry(path, 0, parents=False).path
                transfer_dset(ename, entry, tle, verbose)
        if found_trigin:
            print('Warning: found datasets named "trig_in". Jill-created ' +
                  '"trig_in" datasets segfault when read, so these datasets' +
                  ' were skipped. If you know the datasets are good, rename' +
                  ' them and try again.')
    return bark.Root(root_path)
示例#13
0
    used_files = [args.kwik, args.arf]
    if args.probe:
        used_files.append(args.probe)
    for f in used_files:
        if not os.path.isfile(args.probe):
            raise IOError('no such file: {}'.format(f))
    """
    if not args.out:
        spikes_filename = os.path.splitext(os.path
                                           .split(args.arf_list[0])[-1])[0] \
            + '_spikes.arf'
    else:
        spikes_filename = args.out

    start_sample = args.start_sample # defaults to 0
    for arf_name in args.arf_list:
        with  h5py.File(arf_name, 'r') as arf_file,\
             arf.open_file(spikes_filename, 'w') as spikes_file:
            if args.kwik is not None:
                with h5py.File(args.kwik, 'r') as kwik_file:
                    start_sample = main(kwik_file, arf_file, spikes_file,
                                        args.stim, args.lfp, args.pulse,
                                        args.stimchannel, args.probe,
                                        start_sample=start_sample)
            else:
                start_sample = main(None, arf_file, spikes_file,
                                    args.stim, args.lfp, args.pulse,
                                    args.stimchannel, args.probe,
                                    start_sample=start_sample)
    print("final sample: {}".format(start_sample))
示例#14
0
def test08_check_file_version():
    fp = arf.open_file("test08", mode="a", driver="core", backing_store=False)
    arf.check_file_version(fp)
示例#15
0
 def __enter__(self):
     self.file = arf.open_file(self.filename)
     return self
示例#16
0
文件: test_arf.py 项目: pmalonis/arf
def test07_append_to_table():
    fp = arf.open_file("test07", mode="a", driver="core", backing_store=False)
    dset = arf.create_table(fp, 'test', dtype=nx.dtype([('f1', nx.uint), ('f2', nx.int32)]))
    assert_equal(dset.shape[0], 0)
    arf.append_data(dset, (5, 10))
    assert_equal(dset.shape[0], 1)
示例#17
0
文件: collect.py 项目: melizalab/arfx
def collect_sampled_script(argv=None):
    from natsort import natsorted
    import argparse
    p = argparse.ArgumentParser(prog="arfx-collect-sampled",
                                description="collect sampled data from arf files across channels and entries")
    p.add_argument('--version', action="version",
                   version="%(prog)s " + __version__)
    p.add_argument('-v', '--verbose', help="show verbose log messages", action="store_true")

    p.add_argument("-d", "--dtype", help="convert data to specified type (default is to use as stored)")
    # p.add_argument("-b", "--bark", help="output bark meta.yml file", action="store_true")
    p.add_argument("-c", "--channels", help="list of channels to unpack (default all)",
                   metavar='CHANNEL', nargs="+")
    p.add_argument('-e', '--entries', help="list of entries to unpack (default all)",
                   metavar='ENTRY', nargs='+')

    p.add_argument("arffile", help="the ARF file to unpack")
    p.add_argument("outfile", help="the output file (will be overwritten)")

    args = p.parse_args(argv)

    ch = logging.StreamHandler()
    formatter = logging.Formatter("[%(name)s] %(message)s")
    if args.verbose:
        loglevel = logging.DEBUG
    else:
        loglevel = logging.INFO
    log.setLevel(loglevel)
    ch.setLevel(loglevel)  # change
    ch.setFormatter(formatter)
    log.addHandler(ch)

    with arf.open_file(args.arffile, "r") as arfp:
        log.info("unpacking '%s'", args.arffile)
        arf.check_file_version(arfp)
        entry_names, channel_props = check_entry_consistency(arfp,
                                                             args.entries,
                                                             args.channels,
                                                             predicate=arf.is_time_series)
        if not all_items_equal(channel_props, operator.itemgetter("sampling_rate")):
            log.warn(" - warning: not all datasets have the same sampling rate")
        if not all_items_equal(channel_props, operator.itemgetter("units")):
            log.warn(" - warning: not all datasets have the same units")
        nentries = len(entry_names)
        nchannels = sum(channel_props[c]["channels"] for c in channel_props)
        sampling_rate = first(channel_props, operator.itemgetter("sampling_rate"))
        if args.dtype is None:
            dtype = first(channel_props, operator.itemgetter("dtype"))
        else:
            dtype = args.dtype
        log.info(" - channels (%d):", nchannels)
        for cname in natsorted(channel_props):
            log.info("    - %s", cname)
        log.info("opening '%s' for output", args.outfile)
        log.info(" - sampling rate = %f", sampling_rate)
        log.info(" - dtype = '%s'", dtype)
        log.info(" - entries (%d):", nentries)
        with io.open(args.outfile, mode="w",
                     sampling_rate=sampling_rate, dtype=dtype, nchannels=nchannels) as ofp:
            for entry_name in natsorted(entry_names):
                entry = arfp[entry_name]
                # nsamples = first(entry, operator.attrgetter("shape"))[0]
                # would be more efficient to preallocate but this is easy
                data = np.column_stack(entry[cname][:] for cname in natsorted(channel_props))
                ofp.write(data)
                log.info("    - '%s' -> %d samples", entry_name, data.shape[0])
示例#18
0
文件: test_arf.py 项目: pmalonis/arf
# -*- coding: utf-8 -*-
# -*- mode: python -*-

# test harness for arf interface. assumes the underlying hdf5 and h5py libraries
# are working.

from nose.tools import *
from nose.plugins.skip import SkipTest
from distutils import version

import numpy as nx
import arf
import time
from numpy.random import randn, randint

fp = arf.open_file("test", 'w', driver="core", backing_store=False)
entry_base = "entry_%03d"
tstamp = time.mktime(time.localtime())
entry_attributes = {'intattr': 1,
                    'vecattr': [1, 2, 3],
                    'arrattr': randn(5),
                    'strattr': "an attribute",
                    }
datasets = [dict(name="acoustic",
                 data=randn(100000),
                 sampling_rate=20000,
                 datatype=arf.DataTypes.ACOUSTIC,
                 maxshape=(None,),
                 microphone="DK-1234",
                 compression=0),
            dict(name="neural",
示例#19
0
def main(argv=None):
    import argparse
    from .core import __version__

    p = argparse.ArgumentParser(prog="arfx-split", description=__doc__)
    p.add_argument('--version', action='version',
                   version='%(prog)s ' + __version__)
    p.add_argument('-v', help='verbose output', action='store_true', dest='verbose')

    p.add_argument("--duration", "-T", help="the maximum duration of entries "
                   "(default: %(default).2f seconds)", type=float, default=600)
    p.add_argument("--compress", "-z", help="set compression level in output file "
                   "(default: %(default)d)", type=int, default=1)
    p.add_argument("--dry-run", "-n", help="don't actually create the target file or copy data",
                   action="store_true")
    p.add_argument("--append", "-a", help="if true, will append data from src to tgt (default "
                   "is to overwrite). Note that log files are NOT merged in this mode",
                   action="store_true")
    p.add_argument("src", help="the ARF files to chunk up", nargs="+")
    p.add_argument("tgt", help="the destination ARF file")

    args = p.parse_args(argv)

    ch = logging.StreamHandler()
    formatter = logging.Formatter("[%(name)s] %(message)s")
    if args.verbose:
        loglevel = logging.DEBUG
    else:
        loglevel = logging.INFO
    log.setLevel(loglevel)
    ch.setLevel(loglevel)  # change
    ch.setFormatter(formatter)
    log.addHandler(ch)

    # open all input files and sort entries by timestamp
    log.info("sorting source file entries by timestamp")
    srcs = [h5.File(fname, "r") for fname in args.src]
    entries = sorted(itertools.chain.from_iterable(entry_timestamps(fp) for fp in srcs),
                     key=operator.itemgetter(1))
    if args.verbose:
        log.debug("entry order:")
        for entry, timestamp in entries:
            log.debug("  %s%s (time=%s)", os.path.basename(entry.file.filename), entry.name, timestamp)

    # open output file
    if not args.dry_run:
        if args.append:
            tgt_file = arf.open_file(args.tgt, mode="a")
            log.info("appending to destination file: %s", tgt_file.filename)
            log.info("  counting entries...")
            tgt_entry_index = arf.count_children(tgt_file, h5.Group)
        else:
            tgt_file = arf.open_file(args.tgt, mode="w")
            log.info("created destination file: %s", tgt_file.filename)
            jilllog = merge_jill_logs(srcs)
            if jilllog is not None:
                tgt_file.create_dataset("jill_log", data=jilllog, compression=args.compress)
                log.info("merged jill_log datasets")
            tgt_entry_index = 0

    # iterate through source entries, then chunk up datasets
    for entry, timestamp in entries:
        log.info("source entry: %s%s", os.path.basename(entry.file.filename), entry.name)
        max_duration = entry_duration(entry)
        n_chunks = int(max_duration // args.duration) + 1
        log.debug("  max duration: %3.2f s (chunks=%d)", max_duration, n_chunks)
        for i in range(n_chunks):
            tgt_entry_name = "entry_%05d" % tgt_entry_index
            tgt_timestamp = timestamp + datetime.timedelta(seconds=args.duration) * i
            # create target entry
            log.info("  target entry: %s (time=%s)", tgt_entry_name, tgt_timestamp)
            tgt_entry_index += 1
            # set target entry attributes
            if not args.dry_run:
                tgt_entry = arf.create_entry(tgt_file, tgt_entry_name, tgt_timestamp)
                for k, v in entry.attrs.items():
                    if k == "timestamp":
                        continue
                    elif k == "uuid":
                        k = "origin-uuid"
                    tgt_entry.attrs[k] = v
                tgt_entry.attrs["origin-file"] = os.path.basename(entry.file.filename)
                tgt_entry.attrs["origin-entry"] = os.path.basename(entry.name)
            for dset_name, dset in entry.items():
                if not arf.is_time_series(dset):
                    log.debug("    %s: (not sampled)", dset_name)
                    continue
                sampling_rate = dset.attrs['sampling_rate']
                chunk_size = int(args.duration * sampling_rate)
                start = chunk_size * i
                stop = min(start + chunk_size, dset.shape[0])
                data = dset[start:stop]
                log.debug("    %s: [%d:%d]", dset_name, start, stop)
                if not args.dry_run:
                    tgt_attrs = dict(dset.attrs)
                    try:
                        tgt_attrs['origin-uuid'] = tgt_attrs.pop('uuid')
                    except KeyError:
                        pass
                    arf.create_dataset(tgt_entry, dset_name, data, compression=args.compress,
                                       **tgt_attrs)
示例#20
0
def test08_check_file_version():
    fp = arf.open_file("test08", mode="a", driver="core", backing_store=False)
    arf.check_file_version(fp)