def _read_file(self, tdms_reader, read_metadata_only): tdms_reader.read_metadata() # Use object metadata to build group and channel objects group_properties = OrderedDict() group_channels = OrderedDict() object_properties = { path_string: self._convert_properties(obj.properties) for path_string, obj in tdms_reader.object_metadata.items() } try: self._properties = object_properties['/'] except KeyError: pass for (path_string, obj) in tdms_reader.object_metadata.items(): properties = object_properties[path_string] path = ObjectPath.from_string(path_string) if path.is_root: pass elif path.is_group: group_properties[path.group] = properties else: # Object is a channel try: channel_group_properties = object_properties[ path.group_path()] except KeyError: channel_group_properties = OrderedDict() channel = TdmsChannel(path, obj.data_type, obj.scaler_data_types, obj.num_values, properties, channel_group_properties, self._properties, tdms_reader, self._raw_timestamps, self._memmap_dir) if path.group in group_channels: group_channels[path.group].append(channel) else: group_channels[path.group] = [channel] # Create group objects containing channels and properties for group_name, properties in group_properties.items(): try: channels = group_channels[group_name] except KeyError: channels = [] group_path = ObjectPath(group_name) self._groups[group_name] = TdmsGroup(group_path, properties, channels) for group_name, channels in group_channels.items(): if group_name not in self._groups: # Group with channels but without any corresponding object metadata in the file: group_path = ObjectPath(group_name) self._groups[group_name] = TdmsGroup(group_path, {}, channels) if not read_metadata_only: self._read_data(tdms_reader)
def _channels_to_dataframe(channels_to_export, time_index=False, absolute_time=False, scaled_data=True): import pandas as pd dataframe_dict = OrderedDict() for column_name, channel in channels_to_export.items(): index = channel.time_track(absolute_time) if time_index else None if scaled_data: dataframe_dict[column_name] = pd.Series(data=_array_for_pd( channel[:]), index=index) elif channel.scaler_data_types: # Channel has DAQmx raw data raw_data = channel.read_data(scaled=False) for scale_id, scaler_data in raw_data.items(): scaler_column_name = column_name + "[{0:d}]".format(scale_id) dataframe_dict[scaler_column_name] = pd.Series( data=scaler_data, index=index) else: # Raw data for normal TDMS file raw_data = channel.read_data(scaled=False) dataframe_dict[column_name] = pd.Series( data=_array_for_pd(raw_data), index=index) return pd.DataFrame.from_dict(dataframe_dict)
def __init__(self, file, memmap_dir=None, read_metadata_only=False): """Initialise a new TDMS file object, reading all data. :param file: Either the path to the tdms file to read or an already opened file. :param memmap_dir: The directory to store memmapped data files in, or None to read data into memory. The data files are created as temporary files and are deleted when the channel data is no longer used. tempfile.gettempdir() can be used to get the default temporary file directory. :param read_metadata_only: If this parameter is enabled then the metadata of the TDMS file will only be read. """ self.read_metadata_only = read_metadata_only self.segments = [] self.objects = OrderedDict() self.memmap_dir = memmap_dir if hasattr(file, "read"): # Is a file self._read_segments(file) else: # Is path to a file with open(file, 'rb') as open_file: self._read_segments(open_file)
def as_dataframe(self, absolute_time=False, scaled_data=True): """ Converts the TDMS object to a DataFrame :param absolute_time: Whether times should be absolute rather than relative to the start time. :return: The TDMS object data. :rtype: pandas.DataFrame """ import pandas as pd def get_data(chan): if scaled_data: return chan.data else: return chan.raw_data # When absolute_time is True, # use the wf_start_time as offset for the time_track() try: time = self.time_track(absolute_time) except KeyError: time = None if self.channel is None: return pd.DataFrame.from_dict( OrderedDict( (ch.channel, pd.Series(get_data(ch))) for ch in self.tdms_file.group_channels(self.group))) else: return pd.DataFrame(get_data(self), index=time, columns=[self.path])
def __init__(self, tdms_file): """ Initialise a new TdmsReader :param tdms_file: Either the path to the tdms file to read as a string or pathlib.Path, or an already opened file. """ self._segments = None self._prev_segment_objects = {} self.object_metadata = OrderedDict() self._file_path = None self._index_file_path = None self._segment_channel_offsets = None self._segment_chunk_sizes = None if hasattr(tdms_file, "read"): # Is a file self._file = tdms_file else: # Is path to a file self._file_path = str(tdms_file) self._file = open(self._file_path, 'rb') index_file_path = self._file_path + '_index' if os.path.isfile(index_file_path): self._index_file_path = index_file_path
def __init__(self, file, memmap_dir=None, read_metadata_only=False, keep_open=False): """Initialise a new TdmsFile object :param file: Either the path to the tdms file to read as a string or pathlib.Path, or an already opened file. :param memmap_dir: The directory to store memory mapped data files in, or None to read data into memory. The data files are created as temporary files and are deleted when the channel data is no longer used. tempfile.gettempdir() can be used to get the default temporary file directory. :param read_metadata_only: If this parameter is enabled then only the metadata of the TDMS file will read. :param keep_open: Keeps the file open so data can be read if only metadata is read initially. """ self._memmap_dir = memmap_dir self._groups = OrderedDict() self._properties = {} self._channel_data = {} self._reader = None self.data_read = False reader = TdmsReader(file) try: self._read_file(reader, read_metadata_only) finally: if keep_open: self._reader = reader else: reader.close()
def _convert_properties(self, properties): def convert_prop(val): if isinstance(val, TdmsTimestamp) and not self._raw_timestamps: # Convert timestamps to numpy datetime64 if raw timestamps are not requested return val.as_datetime64() return val return OrderedDict( (k, convert_prop(v)) for (k, v) in properties.items())
def __init__(self, tdms_file, group, raw_data_chunk, channel_offsets): self.name = group.name self._channels = OrderedDict( (channel.name, ChannelDataChunk( tdms_file, channel, raw_data_chunk.channel_data.get(channel.path, RawChannelDataChunk.empty()), channel_offsets[channel.path])) for channel in group.channels())
def __init__(self, path, tdms_file=None): self.path = path self.tdms_file = tdms_file self._data = None self._data_scaled = None self.properties = OrderedDict() self.data_type = None self.dimension = 1 self.number_values = 0 self.has_data = False self._previous_segment_object = None
def from_group(group, time_index=False, absolute_time=False, scaled_data=True): """ Converts a TDMS group object to a DataFrame. DataFrame columns are named using the channel names. :param group: Group object to convert. :param time_index: Whether to include a time index for the dataframe. :param absolute_time: If time_index is true, whether the time index values are absolute times or relative to the start time. :param scaled_data: By default the scaled data will be used. Set to False to use raw unscaled data. :return: The TDMS object data. :rtype: pandas.DataFrame """ channels_to_export = OrderedDict((ch.name, ch) for ch in group.channels()) return _channels_to_dataframe(channels_to_export, time_index, absolute_time, scaled_data)
def as_dataframe(self, time_index=False, absolute_time=False): """ Converts the TDMS file to a DataFrame :param time_index: Whether to include a time index for the dataframe. :param absolute_time: If time_index is true, whether the time index values are absolute times or relative to the start time. :return: The full TDMS file data. :rtype: pandas.DataFrame """ import pandas as pd dataframe_dict = OrderedDict() for key, value in self.objects.items(): if value.has_data: index = value.time_track(absolute_time) if time_index else None dataframe_dict[key] = pd.Series(data=value.data, index=index) return pd.DataFrame.from_dict(dataframe_dict)
def objects(self): """ (Deprecated) A dictionary of objects in the TDMS file, where the keys are the object paths. """ _deprecated( "TdmsFile.objects", "Use TdmsFile.groups() to access all groups in the file, " + "and group.channels() to access all channels in a group.") objects = OrderedDict() root_path = ObjectPath() objects[str(root_path)] = RootObject(self._properties) for group in self.groups(): objects[group.path] = group for channel in group.channels(): objects[channel.path] = channel return objects
def groups(self): """Return the names of groups in the file Note that there is not necessarily a TDMS object associated with each group name. :rtype: List of strings. """ # Split paths into components and take the first (group) component. object_paths = (path_components(path) for path in self.objects) group_names = (path[0] for path in object_paths if len(path) > 0) # Use an ordered dict as an ordered set to find unique # groups in order. groups_set = OrderedDict() for group in group_names: groups_set[group] = None return list(groups_set)
def from_tdms_file(tdms_file, time_index=False, absolute_time=False, scaled_data=True): """ Converts the TDMS file to a DataFrame. DataFrame columns are named using the TDMS object paths. :param tdms_file: TDMS file object to convert. :param time_index: Whether to include a time index for the dataframe. :param absolute_time: If time_index is true, whether the time index values are absolute times or relative to the start time. :param scaled_data: By default the scaled data will be used. Set to False to use raw unscaled data. :return: The full TDMS file data. :rtype: pandas.DataFrame """ channels_to_export = OrderedDict() for group in tdms_file.groups(): for channel in group.channels(): channels_to_export[channel.path] = channel return _channels_to_dataframe(channels_to_export, time_index, absolute_time, scaled_data)
def __init__(self, f, tdms_file): """Read the lead in section of a segment""" self.tdms_file = tdms_file self.position = f.tell() self.num_chunks = 0 self.endianness = "<" # A list of TdmsSegmentObject self.ordered_objects = [] self.final_chunk_proportion = 1.0 # First four bytes should be TDSm try: tag = f.read(4).decode('utf-8') except UnicodeDecodeError: raise ValueError("Segment does not start with TDSm") if tag == '': raise EOFError if tag != 'TDSm': raise ValueError( "Segment does not start with TDSm, but with %s" % tag) log.debug("Reading segment at %d", self.position) # Next four bytes are table of contents mask toc_mask = types.Int32.read(f) self.toc = OrderedDict() for prop_name, prop_mask in toc_properties.items(): prop_is_set = (toc_mask & prop_mask) != 0 self.toc[prop_name] = prop_is_set log.debug("Property %s is %s", prop_name, prop_is_set) if self.toc['kTocBigEndian']: self.endianness = '>' # Next four bytes are version number self.version = types.Int32.read(f, self.endianness) if self.version not in (4712, 4713): log.warning("Unrecognised version number.") # Now 8 bytes each for the offset values self.next_segment_offset = types.Uint64.read(f, self.endianness) self.raw_data_offset = types.Uint64.read(f, self.endianness) # Calculate data and next segment position lead_size = 7 * 4 self.data_position = self.position + lead_size + self.raw_data_offset if self.next_segment_offset == 0xFFFFFFFFFFFFFFFF: # This can happen if Labview crashes log.warning( "Last segment of file has unknown size, " "not attempting to read it") self.next_segment_pos = None self.next_segment_offset = None # Could try to read as much as possible but for now # don't attempt to read last segment raise EOFError else: self.next_segment_pos = ( self.position + self.next_segment_offset + lead_size)
def __init__(self): self.properties = OrderedDict() self.data_type = None self.scaler_data_types = None self.num_values = 0
def __init__(self, tdms_file, raw_data_chunk, channel_offsets): self._groups = OrderedDict( (group.name, GroupDataChunk(tdms_file, group, raw_data_chunk, channel_offsets)) for group in tdms_file.groups())