def insertGroup(self, parent): self.beginInsertRows(parent, 0, 0) parentNode = parent.internalPointer() if parentNode.type() == "Dataset": return False parentEntry = self.getEntry(parentNode) name = "%s/new" % parentEntry.name k = 1 while name.split('/')[-1] in parentEntry.keys(): name = "%s/new(%d)" % (parentEntry.name, k) k += 1 try: tstamp = subprocess.check_output(["date", "+%s"]) arf.create_entry(parentEntry, name, tstamp) except Exception as e: return False try: new_node = TreeNode(name, 'Group') parentNode.addChild(new_node) except Exception as e: del parentEntry[name] return False self.endInsertRows() index = self.index(new_node.row(), 0, parent) return index
def insertGroup(self, parent): self.beginInsertRows(parent, 0, 0) parentNode = parent.internalPointer() if parentNode.type() == "Dataset": return False parentEntry = self.getEntry(parentNode) name = "%s/new" % parentEntry.name k = 1 while name.split("/")[-1] in parentEntry.keys(): name = "%s/new(%d)" % (parentEntry.name, k) k += 1 try: tstamp = subprocess.check_output(["date", "+%s"]) arf.create_entry(parentEntry, name, tstamp) except Exception as e: return False try: new_node = TreeNode(name, "Group") parentNode.addChild(new_node) except Exception as e: del parentEntry[name] return False self.endInsertRows() index = self.index(new_node.row(), 0, parent) return index
def test06_creation_iter(): fp = arf.open_file("test06", mode="a", driver="core", backing_store=False) entry_names = ('z', 'y', 'a', 'q', 'zzyfij') for name in entry_names: g = arf.create_entry(fp, name, 0) arf.create_dataset(g, "dset", (), sampling_rate=1) assert_sequence_equal(arf.keys_by_creation(fp), entry_names)
def copy(kfile, afile, datatypes=0): """ copies the contents of .kwd hdf5 file to a .arf hdf5 file """ # copy top level attributes for k,v in kfile.attrs.items(): afile.attrs[k] = v # descend into /recordings recordings = kfile["recordings"] # recordings holds the "entries", which have names "0", "1", "2", etc for kname, kentry in recordings.items(): timestamp = 0 # TODO determine correct timestamp e = arf.create_entry(afile, kname, timestamp) for k,v in kentry.attrs.items(): e.attrs[k] = v kdata = kentry["data"] if len(datatypes) == 1: datatypes = datatypes * kdata.shape[1] else: assert len(datatypes) == kdata.shape[1] channel_bit_volts = kentry["application_data"].attrs["channel_bit_volts"] channel_sample_rates = kentry["application_data"].attrs["channel_sample_rates"] # kwik files are SxN datasets, while in arf it's N datasets of length S for i in range(kdata.shape[1]): dset = arf.create_dataset(e, name=str(i), data=np.array([],dtype=np.int16), maxshape=(kdata.shape[0],), sampling_rate=channel_sample_rates[i], units='samples', datatype=datatypes[i], compression=6) dset.attrs["bit_volts"] = channel_bit_volts[i] for j in range(int(kdata.shape[0]/BUFFERSIZE) + 1): index = j*BUFFERSIZE arf.append_data(dset, kdata[index:index + BUFFERSIZE, i])
def main(kwik_file, arf_file, spikes_file, stimlog=None, nlfp=0, pulsechan='', stimchannel='', probe=None, start_sample=0, autodetect_pulse_channel=False, verbose=True): if kwik_file is not None: spike_metadata(kwik_file, spikes_file) if autodetect_pulse_channel: #determine pulse channel pulsechan = stimalign.autopulse_dataset_name(arf_file) # traverse arf entries, count samples, add kwik data to arf format entries = [x for x in arf_file.values() if type(x) == h5py.Group] entries = sorted(entries, key=repr) keys = [k for k, x in arf_file.items() if type(x) == h5py.Group] keys = sorted(keys, key=repr) if stimlog: stim_sequence = jstim_log_sequence(stimlog) if len(stim_sequence) != len(entries): print("Warning! jstim log has {} entries, \ arf files has {} entries" .format(len(stim_sequence), len(entries))) else: stim_sequence = [None for e in entries] if probe: spikes_file['geometry'] = get_geometry(probe) # adding spike times and waveforms, and such, creating spike entries # in spikes_file stop_sample = start_sample for k, entry, stim_name in zip(keys, entries, stim_sequence): print(k) print(entry.name) print(entry.attrs['timestamp']) print(stim_name) #create entry in spike arf file spike_entry = arf.create_entry(spikes_file, k, entry.attrs['timestamp']) #write stimulus name as an attribute if stim_name is not None: spike_entry.attrs['stimulus'] = stim_name if pulsechan: find_and_write_pulse_time(arf_file, k, pulsechan, spike_entry, verbose) if stimchannel: spike_entry.copy(arf_file[k][stimchannel], "stim") spike_entry if nlfp: add_lfp(spike_entry, arf_file[k], nlfp) start_sample = stop_sample # update starting time for next entry stop_sample = start_sample + dataset_length(entry) if kwik_file is not None: add_spikes(spike_entry, kwik_file, start_sample, stop_sample) print('Done!') return stop_sample
def copy(kfile, afile, datatypes=0): """ copies the contents of .kwd hdf5 file to a .arf hdf5 file """ # copy top level attributes for k, v in kfile.attrs.items(): afile.attrs[k] = v # descend into /recordings recordings = kfile["recordings"] # recordings holds the "entries", which have names "0", "1", "2", etc for kname, kentry in recordings.items(): timestamp = 0 # TODO determine correct timestamp e = arf.create_entry(afile, kname, timestamp) for k, v in kentry.attrs.items(): e.attrs[k] = v kdata = kentry["data"] if len(datatypes) == 1: datatypes = datatypes * kdata.shape[1] else: assert len(datatypes) == kdata.shape[1] channel_bit_volts = kentry["application_data"].attrs[ "channel_bit_volts"] channel_sample_rates = kentry["application_data"].attrs[ "channel_sample_rates"] # kwik files are SxN datasets, while in arf it's N datasets of length S for i in range(kdata.shape[1]): dset = arf.create_dataset(e, name=str(i), data=np.ravel(kdata[:, i]), compression=6, sampling_rate=channel_sample_rates[i], units='samples', datatype=datatypes[i]) dset.attrs["bit_volts"] = channel_bit_volts[i]
def parse_explog(explog, entry_attrs, datatype, split_sites=False, compression=1, channels=None, dry_run=False): """Parses an explog file to figure out where all the data is stored, and when everything happened. Creates one or more arf files to hold the data, and stores data under the associated entry. datatype: specify the default type of data being recorded types may be specified in the explog; these have precedence channels: if not None, only store data from these entries Additional arguments are used to set attributes on the newly created entries. """ # base for created files arfbase = os.path.splitext(explog)[0] # look up source pcmseq2 file by channel name files = {} # dict of stimuli indexed by samplecount stimuli = {} # dataset attributes dset_attrs = {} # set of all onset times entries = {} fileonset = nx.uint64(0) # corresponds to C long long type lastonset = nx.uint64(0) pen = 0 site = 0 efp = open(explog, 'rU') for line_num, line in enumerate(efp): lstart = line[0:4] # control info if lstart == '%%%%': if line.rstrip().endswith('start'): fileonset = lastonset elif line.find('add') > -1: try: fields = line.partition('add')[-1].split() props = dict(f.split('=') for f in fields[1:]) if 'datatype' in props: props['datatype'] = getattr( arf.DataTypes, props['datatype'].upper()) dset_attrs[fields[0]] = props except (AttributeError, ValueError): log.warn( "L%d parse error: bad channel metadata: ignoring", line_num) # file creation elif lstart == "FFFF": try: fname, base, action = _reg_create.search(line).groups() except AttributeError: log.warn("L%d parse error: %s", line_num, line) continue if channels is not None and base not in channels: continue if action == 'created': ifname = os.path.join(os.path.dirname(explog), fname) try: files[base] = io.open(ifname, mode='r') except Exception as e: log.warn( "error opening source file '%s'; ARF files will be incomplete", ifname) log.debug(e) else: # file was closed; remove from list files.pop(base, None) # new pen or new site elif lstart == "IIII": fields = line.split() if fields[-2] == 'pen': pen = fields[-1] elif fields[-2] == 'site': site = fields[-1] # trigger lines elif lstart == "TTTT": if line.find("TRIG_OFF") > 0 or line.find("SONG_OFF") > 0: continue try: chan, entry, onset = _reg_triggeron.search(line).groups() except AttributeError: log.warn("L%d parse error: %s", line_num, line) continue if channels is not None and chan not in channels: continue try: ifp = files[chan] except KeyError: # user should already have been warned about missing data from # this file continue try: ifp.entry = int(entry) except ValueError: log.warn("L%d runtime error: unable to access %s/%d", line_num, int(entry), chan) continue lastonset = nx.uint64(onset) + fileonset entry_name = "e%ld" % lastonset ofname = target_file_template.format( arfbase, pen, site) if split_sites else base try: ofp = get_dest_arf(ofname, dry_run) except IOError: log.error("target file '%s' already exists; aborting", ofname) return -1 log.debug("%s/%s -> %s/%s/%s", ifp.filename, entry, ofp.filename, entry_name, chan) data = ifp.read() sampling_rate = ifp.sampling_rate if 'sampling_rate' in ofp.attrs: if ofp.attrs['sampling_rate'] != sampling_rate: log.error("%s/%s sampling rate (%d) doesn't match target file (%d).\n" "You may be attempting to load data from the wrong files!", ifp.filename, entry, sampling_rate, ofp.attrs['sampling_rate']) return -1 else: ofp.attrs['sampling_rate'] = sampling_rate if lastonset in entries: entry = ofp[entry_name] else: entry = arf.create_entry( ofp, entry_name, ifp.timestamp, sample_count=lastonset, sampling_rate=sampling_rate, entry_creator='org.meliza.arfx/arfxplog ' + core.__version__, pen=pen, site=site, **entry_attrs) entries[lastonset] = entry if chan in dset_attrs and 'datatype' in dset_attrs[chan]: chan_datatype = dset_attrs[chan]['datatype'] else: chan_datatype = datatype dset = arf.create_dataset(entry, name=chan, data=data, datatype=chan_datatype, sampling_rate=sampling_rate, compression=compression, source_file=ifp.filename, source_entry=ifp.entry) # store duration of longest dataset; could also get this from # TRIG_OFF line, but this is a bit simpler. if data.size > entry.attrs.get('trial_off', 0): entry.attrs['trial_off'] = data.size arf.set_uuid(dset, get_uuid(pen, site, chan)) # stimulus lines elif lstart == "QQQQ": try: rel, onset, stimname = _reg_stimulus.search(line).groups() lastonset = nx.uint64(onset) + fileonset if stimname.startswith('File='): stimname = stimname[5:] stimuli[lastonset] = stimname except AttributeError: log.warn("L%d parse error: %s", line_num, line) # done parsing file efp.close() match_stimuli(stimuli, entries, sampling_rate=sampling_rate)
def create_entry(name): g = arf.create_entry(fp, name, tstamp, **entry_attributes) assert_true(name in fp) assert_greater(arf.timestamp_to_float(g.attrs['timestamp']), 0) for k in entry_attributes: assert_true(k in g.attrs)
def test01_create_existing_entry(): arf.create_entry(fp, entry_base % 0, tstamp, **entry_attributes)
def main(argv=None): import argparse from .core import __version__ p = argparse.ArgumentParser(prog="arfx-split", description=__doc__) p.add_argument('--version', action='version', version='%(prog)s ' + __version__) p.add_argument('-v', help='verbose output', action='store_true', dest='verbose') p.add_argument("--duration", "-T", help="the maximum duration of entries " "(default: %(default).2f seconds)", type=float, default=600) p.add_argument("--compress", "-z", help="set compression level in output file " "(default: %(default)d)", type=int, default=1) p.add_argument("--dry-run", "-n", help="don't actually create the target file or copy data", action="store_true") p.add_argument("--append", "-a", help="if true, will append data from src to tgt (default " "is to overwrite). Note that log files are NOT merged in this mode", action="store_true") p.add_argument("src", help="the ARF files to chunk up", nargs="+") p.add_argument("tgt", help="the destination ARF file") args = p.parse_args(argv) ch = logging.StreamHandler() formatter = logging.Formatter("[%(name)s] %(message)s") if args.verbose: loglevel = logging.DEBUG else: loglevel = logging.INFO log.setLevel(loglevel) ch.setLevel(loglevel) # change ch.setFormatter(formatter) log.addHandler(ch) # open all input files and sort entries by timestamp log.info("sorting source file entries by timestamp") srcs = [h5.File(fname, "r") for fname in args.src] entries = sorted(itertools.chain.from_iterable(entry_timestamps(fp) for fp in srcs), key=operator.itemgetter(1)) if args.verbose: log.debug("entry order:") for entry, timestamp in entries: log.debug(" %s%s (time=%s)", os.path.basename(entry.file.filename), entry.name, timestamp) # open output file if not args.dry_run: if args.append: tgt_file = arf.open_file(args.tgt, mode="a") log.info("appending to destination file: %s", tgt_file.filename) log.info(" counting entries...") tgt_entry_index = arf.count_children(tgt_file, h5.Group) else: tgt_file = arf.open_file(args.tgt, mode="w") log.info("created destination file: %s", tgt_file.filename) jilllog = merge_jill_logs(srcs) if jilllog is not None: tgt_file.create_dataset("jill_log", data=jilllog, compression=args.compress) log.info("merged jill_log datasets") tgt_entry_index = 0 # iterate through source entries, then chunk up datasets for entry, timestamp in entries: log.info("source entry: %s%s", os.path.basename(entry.file.filename), entry.name) max_duration = entry_duration(entry) n_chunks = int(max_duration // args.duration) + 1 log.debug(" max duration: %3.2f s (chunks=%d)", max_duration, n_chunks) for i in range(n_chunks): tgt_entry_name = "entry_%05d" % tgt_entry_index tgt_timestamp = timestamp + datetime.timedelta(seconds=args.duration) * i # create target entry log.info(" target entry: %s (time=%s)", tgt_entry_name, tgt_timestamp) tgt_entry_index += 1 # set target entry attributes if not args.dry_run: tgt_entry = arf.create_entry(tgt_file, tgt_entry_name, tgt_timestamp) for k, v in entry.attrs.items(): if k == "timestamp": continue elif k == "uuid": k = "origin-uuid" tgt_entry.attrs[k] = v tgt_entry.attrs["origin-file"] = os.path.basename(entry.file.filename) tgt_entry.attrs["origin-entry"] = os.path.basename(entry.name) for dset_name, dset in entry.items(): if not arf.is_time_series(dset): log.debug(" %s: (not sampled)", dset_name) continue sampling_rate = dset.attrs['sampling_rate'] chunk_size = int(args.duration * sampling_rate) start = chunk_size * i stop = min(start + chunk_size, dset.shape[0]) data = dset[start:stop] log.debug(" %s: [%d:%d]", dset_name, start, stop) if not args.dry_run: tgt_attrs = dict(dset.attrs) try: tgt_attrs['origin-uuid'] = tgt_attrs.pop('uuid') except KeyError: pass arf.create_dataset(tgt_entry, dset_name, data, compression=args.compress, **tgt_attrs)