def bandPass( timeData: TimeData, cutoffLow: float, cutoffHigh: float, inplace: bool = True ) -> TimeData: """Bandpass butterworth filter for time data Parameters ---------- timeData : TimeData timeData to filter cutoff : float Cutoff frequency in Hz inplace : bool, optional Whether to manipulate the data inplace Returns ------- TimeData Filtered time data """ if not inplace: timeData = timeData.copy() timeData.data = bandPassData( timeData.data, timeData.sampleFreq, cutoffLow, cutoffHigh ) timeData.addComment( "Band pass filter applied with cutoffs {} Hz and {} Hz".format( cutoffLow, cutoffHigh ) ) return timeData
def polarityReversal(timeData: TimeData, reversal: Dict[str, bool], inplace: bool = True) -> TimeData: """Multiply the data by -1 (polarity reversal) Parameters ---------- timeData : TimeData timeData to normalise reversal : Dict[str, bool] Keys are channels and values are boolean flags for reversing inplace : bool, optional Whether to manipulate the data inplace Returns ------- TimeData Normalised time data """ if not inplace: timeData = timeData.copy() timeData.data = polarityReversalData(timeData.data, reversal) timeData.addComment( "Polarity reversal with parameters: {}".format(reversal)) return timeData
def getData(self, iWindow: int) -> TimeData: """Returns time window data for local index Parameters ---------- iWindow : int Local index of window Returns ------- windowData : TimeData TimeData object with the window data """ winSamples = self.winSamples[iWindow] winData = {} for c in self.chans: winData[c] = self.timeData.data[c][ winSamples[0]:winSamples[1] + 1] # add 1 because numpy indexing like this is not inclusive globalWindow = self.winTimes[iWindow][0] winStartTime = self.winTimes[iWindow][1] winStopTime = self.winTimes[iWindow][2] return TimeData( sampleFreq=self.sampleFreq, startTime=winStartTime, stopTime=winStopTime, data=winData, comments=self.timeData.comments + ["Local window iW, global window {}".format(globalWindow)], )
def interpolateToSecond(timeData: TimeData, inplace: bool = True) -> TimeData: """Interpolate data to be on the second Some formats of time data (e.g. SPAM) do not start on the second with their sampling. This method interpolates so that sampling starts on the second and improves interoperability with other recording formats. Parameters ---------- timeData : TimeData Time data to interpolate onto the second inplace : bool, optional Whether to do the interpolation inplace or not. Default is True. Returns ------- TimeData Time data interpolated to start on the second """ startTimeInterp, numSamplesInterp, dataInterp = interpolateToSecondData( timeData.data, timeData.sampleFreq, timeData.startTime ) if not inplace: timeData = timeData.copy() timeData.numSamples = numSamplesInterp timeData.startTime = startTimeInterp # calculate end timeEnd timeData.stopTime = timeData.startTime + timedelta( seconds=(1.0 / timeData.sampleFreq) * (timeData.numSamples - 1) ) timeData.data = dataInterp timeData.addComment( "Time data interpolated to nearest second. New start time {}, new end time {}, new number of samples {} ".format( timeData.startTime, timeData.stopTime, timeData.numSamples ) ) return timeData
def resample(timeData: TimeData, resampFreq: float, inplace: bool = True) -> TimeData: """Resample time data Parameters ---------- timeData : TimeData timeData to filter resampFreq : float The frequency to resample to inplace : bool, optional Whether to manipulate the data inplace Returns ------- TimeData Filtered time data """ origFreq = timeData.sampleFreq if not inplace: timeData = timeData.copy() timeData.data = resampleData(timeData.data, timeData.sampleFreq, resampFreq) # update the time info timeData.sampleFreq = resampFreq timeData.numSamples = timeData.data[timeData.chans[0]].size timeData.stopTime = timeData.startTime + timedelta( seconds=(1.0 / timeData.sampleFreq) * (timeData.numSamples - 1) ) timeData.addComment( "Time data resampled from {:.6f} Hz to {:.6f} Hz".format(origFreq, resampFreq) ) return timeData
def normalise(timeData: TimeData, inplace: bool = True) -> TimeData: """Normalise time data Parameters ---------- timeData : TimeData timeData to normalise inplace : bool, optional Whether to manipulate the data inplace Returns ------- TimeData Normalised time data """ if not inplace: timeData = timeData.copy() timeData.data = normaliseData(timeData.data) timeData.addComment("Data normalised") return timeData
def scale(timeData: TimeData, scalars: Dict[str, bool], inplace: bool = True) -> TimeData: """Scale the data by an arbitrary amount Parameters ---------- timeData : TimeData timeData to normalise scalars : Dict[str, float] Keys are channels and values are boolean flags for reversing inplace : bool, optional Whether to manipulate the data inplace Returns ------- TimeData Normalised time data """ if not inplace: timeData = timeData.copy() timeData.data = scaleData(timeData.data, scalars) timeData.addComment("Time data scaled with scalars: {}".format(scalars)) return timeData
def notchFilter(timeData: TimeData, notch: float, inplace: bool = True) -> TimeData: """Bandpass butterworth filter for time data Parameters ---------- timeData : TimeData timeData to filter notch : float Frequency to notch filter in Hz inplace : bool, optional Whether to manipulate the data inplace Returns ------- TimeData Filtered time data """ if not inplace: timeData = timeData.copy() timeData.data = notchFilterData( timeData.data, timeData.sampleFreq, notch, notch / 5.0 ) timeData.addComment("Notch filter applied at {} Hz".format(notch)) return timeData
def test_spectra_calculator_window(): """Test Fourier transfrom with linear detrend applied""" from resistics.common.format import datetimeFormat from resistics.time.data import TimeData from resistics.spectra.calculator import SpectrumCalculator import numpy as np from datetime import datetime # intialise some time data sampleFreq = 128 startTime = "2020-01-01 00:00:00.000000" stopTime = "2020-01-01 00:00:00.062500" data = {} # test with impulse on zero and impulse shifted to give a phase data["Ex"] = np.array([1, 0, 0, 0, 0, 0, 0, 0]) data["Hy"] = np.array([0, 1, 0, 0, 0, 0, 0, 0]) timeData = TimeData(sampleFreq, startTime, stopTime, data) specCalc = SpectrumCalculator(128, 8) specCalc.detrend = False specCalc.applywindow = True specData = specCalc.calcFourierCoeff(timeData) assert np.absolute(specData.nyquist - 64) < 0.000001 assert specData.windowSize == 8 assert specData.dataSize == 5 assert specData.numChans == 2 assert sorted(specData.chans) == sorted(["Ex", "Hy"]) assert specData.startTime == datetime.strptime(startTime, datetimeFormat(ns=True)) assert specData.stopTime == datetime.strptime(stopTime, datetimeFormat(ns=True)) np.testing.assert_array_almost_equal(specData.freqArray, [0, 16, 32, 48, 64]) np.testing.assert_array_almost_equal( specData.data["Ex"], [1.0 + 0.0j, 1.0 + 0.0j, 1.0 + 0.0j, 1.0 + 0.0j, 1.0 + 0.0j], ) np.testing.assert_array_almost_equal( specData.data["Hy"], [ 1 + 0j, 0.707107 - 0.707107j, 0 - 1j, -0.707107 - 0.707107j, -1 + 0j, ], )
def fillGap(timeData1, timeData2): """Fill gap between time series Fill gaps between two different recordings. The intent is to fill the gap when recording has been interrupted and there are two data files. Both times series must have the same sampling frequency. Parameters ---------- timeDat1 : TimeData Time series data timeData2 : TimeData Time series data Returns ------- TimeData Time series data with gap filled """ if timeData1.sampleFreq != timeData2.sampleFreq: errorPrint( "fillGap", "fillGap requires both timeData objects to have the same sample rate", quitrun=True, ) return False sampleFreq = timeData1.sampleFreq sampleRate = 1.0 / sampleFreq timeDataFirst = timeData1 timeDataSecond = timeData2 if timeData1.startTime > timeData2.stopTime: timeDataFirst = timeData2 timeDataSecond = timeData1 # now want to do a simple interpolation between timeDataFirst and timeDataSecond # recall, these times are inclusive, so want to do the samples in between # this is mostly for clarity of programming gapStart = timeDataFirst.stopTime + timedelta(seconds=sampleRate) gapEnd = timeDataSecond.startTime - timedelta(seconds=sampleRate) # calculate number of samples in the gap numSamplesGap = ( int(round((gapEnd - gapStart).total_seconds() * sampleFreq)) + 1 ) # add 1 because inclusive # now want to interpolate newData = {} for chan in timeDataFirst.chans: startVal = timeDataFirst.data[chan][-1] endVal = timeDataSecond.data[chan][0] increment = 1.0 * (endVal - startVal) / (numSamplesGap + 2) fillData = np.zeros(shape=(numSamplesGap), dtype=timeDataFirst.data[chan].dtype) for i in range(0, numSamplesGap): fillData[i] = startVal + (i + 1) * increment newData[chan] = np.concatenate( [timeDataFirst.data[chan], fillData, timeDataSecond.data[chan]] ) # return a new time data object # deal with the comment comment = ( ["-----------------------------", "TimeData1 comments"] + timeDataFirst.comments + ["-----------------------------", "TimeData2 comments"] + timeDataSecond.comments ) comment += ["-----------------------------"] + [ "Gap filled from {} to {}".format(gapStart, gapEnd) ] return TimeData( sampleFreq=sampleFreq, startTime=timeDataFirst.startTime, stopTime=timeDataSecond.stopTime, data=newData, comments=comment, )
def reformatHigh(self, path: str, **kwargs) -> None: """Write out high frequency time series in internal format Parameters ---------- path : str Directory to write out the reformatted time series ts : List[int], optional A list of the high frequency ts files to reformat. By default, all of the higher frequency recordings are reformatted """ writer = TimeWriterInternal() for idx, ts in enumerate(self.tsNums): if "ts" in kwargs and ts not in kwargs["ts"]: continue # do not reformat this one # let's get the headers headers = self.getHeaders() chanHeaders, chanMap = self.getChanHeaders() chans = self.getChannels() # now go through the different ts files to get ready to output if ts == self.continuous: continue sampleFreq = self.tsSampleFreqs[idx] # set sample frequency in headers headers["sample_freq"] = sampleFreq for cH in chanHeaders: cH["sample_freq"] = sampleFreq # now open the data file dFile = open(self.dataF[idx], "rb") # each record has to be read separately and then compare time to previous outStartTime = datetime.strptime(self.recordStarts[ts][0], "%Y-%m-%d %H:%M:%S.%f") # set up the data dictionary data = {} for record, startDate in enumerate(self.recordStarts[ts]): # start date is a string startByte = self.recordBytes[ts][record] startDateTime = datetime.strptime(startDate, "%Y-%m-%d %H:%M:%S.%f") # read the record - numpy does not support 24 bit two's complement (3 bytes) - hence use struct bytesToRead = (self.recordScans[ts][record] * self.sampleByteSize * self.getNumChannels()) dFile.seek(startByte, 0) # seek to start byte from start of file dataBytes = dFile.read(bytesToRead) dataRead = self.twosComplement(dataBytes) dataRecord = {} for chan in chans: # as it is the same order as in the header file chanIndex = self.chanMap[chan] dataRecord[chan] = dataRead[ chanIndex:self.recordScans[ts][record] * self.getNumChannels():self.getNumChannels()] # need to compare to previous record if record != 0 and startDateTime != prevEndTime: # then need to write out the current data before saving the new data # write out current data outStopTime = prevEndTime - timedelta( seconds=1.0 / sampleFreq ) # because inclusive of first sample (previous end time for continuity comparison) # calculate number of samples numSamples = data[chans[0]].size headers["start_date"] = outStartTime.strftime("%Y-%m-%d") headers["start_time"] = outStartTime.strftime( "%H:%M:%S.%f") headers["stop_date"] = outStopTime.strftime("%Y-%m-%d") headers["stop_time"] = outStopTime.strftime("%H:%M:%S.%f") headers["num_samples"] = numSamples for cH in chanHeaders: cH["start_date"] = headers["start_date"] cH["start_time"] = headers["start_time"] cH["stop_date"] = headers["stop_date"] cH["stop_time"] = headers["stop_time"] cH["num_samples"] = numSamples # get the outpath dataOutpath = os.path.join( path, "meas_ts{}_{}_{}".format( ts, outStartTime.strftime("%Y-%m-%d-%H-%M-%S"), outStopTime.strftime("%Y-%m-%d-%H-%M-%S"), ), ) # create the timeData object comment = "Unscaled samples for interval {} to {} read in from measurement {}".format( outStartTime, outStopTime, self.dataF[idx]) timeData = TimeData( sampleFreq=self.getSampleFreq(), startTime=outStartTime, stopTime=outStopTime, data=data, comments=comment, ) # write out writer.setOutPath(dataOutpath) writer.writeData(headers, chanHeaders, timeData) # then save current data outStartTime = startDateTime data = copy.deepcopy(dataRecord) prevEndTime = startDateTime + timedelta(seconds=( (1.0 / sampleFreq) * self.recordScans[ts][record])) else: # then record == 0 or startDateTime == prevEndTime # update prevEndTime prevEndTime = startDateTime + timedelta(seconds=( (1.0 / sampleFreq) * self.recordScans[ts][record])) if record == 0: data = copy.deepcopy(dataRecord) continue # otherwise, want to concatenate the data for chan in chans: data[chan] = np.concatenate( (data[chan], dataRecord[chan])) # close the data file dFile.close()
def getUnscaledSamples(self, **kwargs) -> TimeData: """Get raw data from data file Only returns the continuous data. The continuous data is in 24 bit two's complement (3 bytes) format and is read in using struct as this is not supported by numpy. Parameters ---------- chans : List[str], optional List of channels to return if not all are required startSample : int, optional First sample to return endSample : int, optional Last sample to return Returns ------- TimeData Time data object """ # initialise chans, startSample and endSample with the whole dataset options = self.parseGetDataKeywords(kwargs) # get the files to read and the samples to take from them, in the correct order recordsToRead, samplesToRead = self.getRecordsForSamples( options["startSample"], options["endSample"]) numSamples = options["endSample"] - options["startSample"] + 1 # set up the dictionary to hold the data data = {} for chan in options["chans"]: data[chan] = np.zeros(shape=(numSamples), dtype=self.dtype) # open the file dFile = open(self.continuousF, "rb") # loop through chans and get data sampleCounter = 0 for record, sToRead in zip(recordsToRead, samplesToRead): # number of samples to read in record dSamples = sToRead[1] - sToRead[0] + 1 # find the byte read start and byte read end recordByteStart = self.recordBytes[self.continuous][record] recordSampleStart = self.recordSampleStarts[ self.continuous][record] # find the offset on the readFrom bytes # now recall, each sample is recorded as a scan (all channels recorded at the same time) # so multiply by number of channels to get the number of bytes to read byteReadStart = (recordByteStart + (sToRead[0] - recordSampleStart) * self.sampleByteSize * self.getNumChannels()) bytesToRead = dSamples * self.sampleByteSize * self.getNumChannels( ) # read the data - numpy does not support 24 bit two's complement (3 bytes) - hence use struct dFile.seek(byteReadStart, 0) # seek to start byte from start of file dataBytes = dFile.read(bytesToRead) dataRead = self.twosComplement(dataBytes) # now need to unpack this for chan in options["chans"]: # check to make sure channel exists self.checkChan(chan) # get the channel index - the chanIndex should give the right order in the data file # as it is the same order as in the header file chanIndex = self.chanMap[chan] # now populate the channel data appropriately data[chan][sampleCounter:sampleCounter + dSamples] = dataRead[chanIndex:dSamples * self.getNumChannels():self. getNumChannels()] # increment sample counter sampleCounter = sampleCounter + dSamples # get ready for the next data read # close file dFile.close() # return data startTime, stopTime = self.sample2time(options["startSample"], options["endSample"]) comment = "Unscaled data {} to {} read in from measurement {}, samples {} to {}".format( startTime, stopTime, self.dataPath, options["startSample"], options["endSample"], ) return TimeData( sampleFreq=self.getSampleFreq(), startTime=startTime, stopTime=stopTime, data=data, comments=comment, )
def calibrate( self, timeData: TimeData, sensor: Dict[str, str], serial: Dict[str, int], chopper: Dict[str, bool], ) -> TimeData: """Calibrate time data For each channel in timeData, searches for a matching calibration file based on sensor type, serial number and chopper. If a calibration file is found, the channel is calibrated using the data in the file. If useTheoretical is False and no file is found, the data is not calibrated todo: If no calibration file is found and the channel is a magnetic data channel, a theoretical function can be used Parameters ---------- timeData : TimeData TimeData object sensor : Dict Dictionary of sensor information with channels as the key and sensor as the value (sensor is a string) serial : Dictionary of serial information with channels as the key and sensor as the value (serial is a number) chopper : Dictionary of chopper information with channels as the key and sensor as the value (chopper is a bool) Returns ------- timeData : TimeData Calibration TimeData object """ calIO = CalibrationIO() # iterate over data for chan in timeData.chans: # output some info self.printText("Calibrating channel {}".format(chan)) # try and find calibration file calFile, calFormat = self.getCalFile(sensor[chan], serial[chan], chopper[chan]) if calFile == "": # no file found if self.useTheoretical and isMagnetic(chan): # use theoretical calData = self.getTheoreticalCalData(sensor[chan]) timeData.data[chan] = self.calibrateChan( timeData.data[chan], timeData.sampleFreq, calData) timeData.addComment( "Channel {} calibrated with theoretical calibration function" .format(chan)) continue else: self.printText( "No Calibration data found - channel will not be calibrated" ) timeData.addComment( "Channel {} not calibrated".format(chan)) continue # nothing to do # else file found # no need to separately apply static gain, already included in cal data calIO.refresh(calFile, calFormat, chopper=chopper[chan], extend=self.extend) calData = calIO.read() self.printText( "Calibration file found for sensor {}, serial number {}, chopper {}: {}" .format(sensor[chan], serial[chan], chopper[chan], calFile)) self.printText("Format: {}".format(calFormat)) self.printText( "Static gain correction of {} applied to calibration data". format(calData.staticGain)) # calibrate time data timeData.data[chan] = self.calibrateChan(timeData.data[chan], timeData.sampleFreq, calData) timeData.addComment( "Channel {} calibrated with calibration data from file {}". format(chan, calFile)) # return calibrated time data return timeData
def getUnscaledSamples(self, **kwargs) -> TimeData: """Get raw data from data file Depending on the data format, this could be raw counts or in some physical unit. The method implemented in the base DataReader can read from ATS and internal files. SPAM and Phoenix data readers have their own implementations. The raw data units for ATS and internal data formats are as follows: - ATS data format has raw data in counts. - The raw data unit of the internal format is dependent on what happened to the data before writing it out in the internal format. If the channel header scaling_applied is set to True, no scaling happens in either getUnscaledSamples or getPhysicalSamples. However, if the channel header scaling_applied is set to False, the internal format data will be treated like ATS data, meaning raw data in counts. Parameters ---------- chans : List[str], optional List of channels to return if not all are required startSample : int, optional First sample to return endSample : int, optional Last sample to return Returns ------- TimeData Time data object """ # initialise chans, startSample and endSample with the whole dataset options = self.parseGetDataKeywords(kwargs) # get samples - this is inclusive dSamples = options["endSample"] - options["startSample"] + 1 # loop through chans and get data data = {} for chan in options["chans"]: # check to make sure channel exists self.checkChan(chan) # get data file dFile = os.path.join(self.dataPath, self.getChanDataFile(chan)) # get the data byteOff = self.dataByteOffset + options[ "startSample"] * self.dataByteSize # now check if lsb applied or not and read data as float32 or int32 accordingly if self.getChanScalingApplied(chan): data[chan] = np.memmap(dFile, dtype="float32", mode="r", offset=byteOff, shape=(dSamples)) else: data[chan] = np.memmap(dFile, dtype="int32", mode="r", offset=byteOff, shape=(dSamples)) # get data start and stop time startTime, stopTime = self.sample2time(options["startSample"], options["endSample"]) # dataset comments comments = [] comments.append( "Unscaled data {} to {} read in from measurement {}, samples {} to {}" .format( startTime, stopTime, self.dataPath, options["startSample"], options["endSample"], )) comments.append("Sampling frequency {}".format(self.getSampleFreq())) if len(self.comments) > 0: comments = self.comments + comments return TimeData( sampleFreq=self.getSampleFreq(), startTime=startTime, stopTime=stopTime, data=data, comments=comments, )
def getUnscaledSamples(self, **kwargs) -> TimeData: """Get raw data from data file, returned in mV SPAM raw data is single precision float with unit Volts. Calling this applies the ts_lsb calculated when the headers are read. This is because when a recording consists of multiple data files, each channel of each data file might have a different scaling. The only way to make the data consistent is to apply the ts_lsb scaling. Therefore, this method returns the data in mV for all channels. Parameters ---------- chans : List[str], optional List of channels to return if not all are required startSample : int, optional First sample to return endSample : int, optional Last sample to return Returns ------- TimeData Time data object """ # initialise chans, startSample and endSample with the whole dataset options = self.parseGetDataKeywords(kwargs) # get the files to read and the samples to take from them, in the correct order dataFilesToRead, samplesToRead, scalings = self.getDataFilesForSamples( options["startSample"], options["endSample"]) numSamples = options["endSample"] - options["startSample"] + 1 # set up the dictionary to hold the data data = {} for chan in options["chans"]: data[chan] = np.zeros(shape=(numSamples), dtype=self.dtype) # loop through chans and get data sampleCounter = 0 for dFile, sToRead, scalar in zip(dataFilesToRead, samplesToRead, scalings): # get samples - this is inclusive dSamples = sToRead[1] - sToRead[0] + 1 # spam files always record 5 channels dSamplesRead = dSamples * self.recChannels[dFile] # read the data byteOff = ( self.dataByteOffset[dFile] + sToRead[0] * self.recChannels[dFile] * self.dataByteSize) dFilePath = os.path.join(self.dataPath, dFile) dataRead = np.memmap( dFilePath, dtype=self.dtype, mode="r", offset=byteOff, shape=(dSamplesRead), ) # now need to unpack this for chan in options["chans"]: # check to make sure channel exists self.checkChan(chan) # get the channel index - the chanIndex should give the right order in the data file # as it is the same order as in the header file chanIndex = self.chanMap[chan] # use the range sampleCounter -> sampleCounter + dSamples, because this actually means sampleCounter + dSamples - 1 as python ranges are not inclusive of the end value # scale by the lsb scalar here - note that these can be different for each file in the run data[chan][sampleCounter:sampleCounter + dSamples] = ( dataRead[chanIndex:dSamplesRead:self.recChannels[dFile]] * scalar[chan]) # increment sample counter sampleCounter = sampleCounter + dSamples # get ready for the next data read # return data startTime, stopTime = self.sample2time(options["startSample"], options["endSample"]) comments = [] comments.append( "Unscaled data {} to {} read in from measurement {}, samples {} to {}" .format( startTime, stopTime, self.dataPath, options["startSample"], options["endSample"], )) comments.append("Data read from {} files in total".format( len(dataFilesToRead))) comments.append( "Data scaled to mV for all channels using scalings in header files" ) comments.append("Sampling frequency {}".format(self.getSampleFreq())) return TimeData( sampleFreq=self.getSampleFreq(), startTime=startTime, stopTime=stopTime, data=data, comments=comments, )
def write(self, headers: Dict, chanHeaders: List, chanMap: Dict, timeData: TimeData, **kwargs): """Write out the header file Parameters ---------- headers : Dict Dictionary of headers chanHeaders : List List of channel headers chanMap : Dict Maps channel to index for chanHeaders timeData : TimeData Time series data as TimeData object """ # set global headers for keyword arguments headers = self.setGlobalHeadersFromKeywords(headers, kwargs) # set channel headers for keyword arguments chanHeaders = self.setChanHeadersFromKeywords(chanHeaders, kwargs) # now overwrite the options by checking the TimeData object # number of samples and sample frequency # Current method favours the time data object chans = sorted(list(timeData.chans)) dataSizes = [] for c in chans: dataSizes.append(timeData.data[c].size) if min(dataSizes) != max(dataSizes): self.printWarning( "Channels do not have the same number of samples: {} - {}". format(", ".join(chans), ", ".join(dataSizes))) self.printWarning( "Only the smallest number of samples will be written out") numSamples = min(dataSizes) if headers["num_samples"] != numSamples: self.printWarning( "Number of samples {} in headers does not match number of samples in TimeData object {}. TimeData info will be used." .format(headers["num_samples"], numSamples)) headers["num_samples"] = numSamples timeData.numSamples = numSamples # sample freq if headers["sample_freq"] != timeData.sampleFreq: self.printWarning( "Sample frequency of {} Hz in headers does not match {} Hz in TimeData object" .format(headers["sample_freq"], timeData.sampleFreq)) self.printWarning( "Sample frequency in TimeData object will be used") headers["sample_freq"] = timeData.sampleFreq # deal with start and end time and create datetime objects # the start time does not change on resampling, only the end time datetimeStart = datetime.strptime( "{} {}".format(headers["start_date"], headers["start_time"]), "%Y-%m-%d %H:%M:%S.%f", ) datetimeStop = datetime.strptime( "{} {}".format(headers["stop_date"], headers["stop_time"]), "%Y-%m-%d %H:%M:%S.%f", ) # now let's compare to the time data if datetimeStart != timeData.startTime: self.printWarning( "Start in headers {} does not match that in TimeData object {}. TimeData start time will be used" .format(datetimeStart, timeData.startTime)) datetimeStart = timeData.startTime if datetimeStop != timeData.stopTime: self.printWarning( "Stop in headers {} does not match that in TimeData object {}. TimeData stop time will be used" .format(datetimeStop, timeData.stopTime)) datetimeStop = timeData.stopTime # now recalculate datetime using the number of samples and compare again datetimeRecalc = self.calcStopDateTime(timeData.sampleFreq, numSamples, datetimeStart) if datetimeRecalc != datetimeStop: self.printWarning( "Note, discrepancy between stop time in given headers and those calculated from data" ) self.printWarning( "Causes of this might be resampling or interpolation processes and the limiting of data" ) self.printWarning( "If no resampling, interpolation or limiting of data has been performed, please check all times" ) self.printWarning( "Stop time {} calculated from data will be used instead of that in data {}" .format(datetimeRecalc, datetimeStop)) datetimeStop = datetimeRecalc headers["start_date"] = datetimeStart.strftime("%Y-%m-%d") headers["start_time"] = datetimeStart.strftime("%H:%M:%S.%f") headers["stop_date"] = datetimeStop.strftime("%Y-%m-%d") headers["stop_time"] = datetimeStop.strftime("%H:%M:%S.%f") # now update all the chan headers and limit data to numSamples for c in chans: timeData.data[c] = timeData.data[c][:numSamples] cIndex = chanMap[c] chanHeaders[cIndex]["num_samples"] = headers["num_samples"] chanHeaders[cIndex]["sample_freq"] = headers["sample_freq"] chanHeaders[cIndex]["start_date"] = headers["start_date"] chanHeaders[cIndex]["start_time"] = headers["start_time"] chanHeaders[cIndex]["stop_date"] = headers["stop_date"] chanHeaders[cIndex]["stop_time"] = headers["stop_time"] # finally, check the number of measurement channels headers["meas_channels"] = len(chans) # now write out the headers and save to class variables self.writeHeaders(headers, chans, chanMap, chanHeaders) self.headers = headers self.chans = chans self.chanMap = chanMap self.chanHeaders = chanHeaders # write out comment file self.writeComments(timeData.comments) # write out the data files self.writeDataFiles(chans, timeData)
def getUnscaledSamples(self, **kwargs) -> TimeData: """Get raw data from ascii data file This function simply reads the lines which match the samples to be read Parameters ---------- chans : List[str], optional List of channels to return if not all are required startSample : int, optional First sample to return endSample : int, optional Last sample to return Returns ------- TimeData Time data object """ # initialise chans, startSample and endSample with the whole dataset options = self.parseGetDataKeywords(kwargs) # get samples - this is inclusive dSamples = options["endSample"] - options["startSample"] + 1 # loop through chans and get data data = {} for chan in options["chans"]: # check to make sure channel exists self.checkChan(chan) # get data file dFile = os.path.join(self.dataPath, self.getChanDataFile(chan)) # read the lines dataChan = np.zeros(shape=(dSamples), dtype=np.float32) with open(dFile) as dF: for il, line in enumerate(dF): if il > options["endSample"]: break if il >= options["startSample"] and il <= options["endSample"]: dIndex = il - options["startSample"] dataChan[dIndex] = float(line.strip()) # set the data data[chan] = dataChan # get data start and stop time startTime, stopTime = self.sample2time( options["startSample"], options["endSample"] ) # dataset comments comments = [] comments.append( "Unscaled data {} to {} read in from measurement {}, samples {} to {}".format( startTime, stopTime, self.dataPath, options["startSample"], options["endSample"], ) ) comments.append("Sampling frequency {}".format(self.getSampleFreq())) if len(self.comments) > 0: comments = self.comments + comments return TimeData( sampleFreq=self.getSampleFreq(), startTime=startTime, stopTime=stopTime, data=data, comments=comments, )
def getUnscaledSamples(self, **kwargs) -> TimeData: """Get raw data from data file, returned in mV Lemi B423 data always has five channels, in order Hx, Hy, Hz, Ex, Ey. The raw data is integer counts. Therefore, getting unscaled samples returns raw counts for the measurement. There are additional scalings which can be applied using the scale optional argument. Lemi B423 is recorded in multiple files. It has not been verified whether it is possible for each individual file to have different scaling. Without the scale option, the data is returned in: - Counts for both magnetic and electric channels (reading long integers) With the scaling option, the data is returned in: - microvolts for the electric channels - millivolts for the magnetic with the gain applied Applying the scaling does not appear to remove the internal gain of the Lemi. This will be removed when getting physical samples and the appropriate value must be set in the headers. Parameters ---------- chans : List[str], optional List of channels to return if not all are required startSample : int, optional First sample to return endSample : int, optional Last sample to return scale : bool, optional Boolean flag for applying the gain scaling Returns ------- TimeData Time data object """ # initialise chans, startSample and endSample with the whole dataset options = self.parseGetDataKeywords(kwargs) # get the files to read and the samples to take from them, in the correct order dataFilesToRead, samplesToRead, scalings = self.getDataFilesForSamples( options["startSample"], options["endSample"]) numSamples = options["endSample"] - options["startSample"] + 1 # set up the dictionary to hold the data dtype = np.float32 if options["scale"] else self.dtype data = {} for chan in options["chans"]: data[chan] = np.zeros(shape=(numSamples), dtype=dtype) # prepare comments startTime, stopTime = self.sample2time(options["startSample"], options["endSample"]) comments = [] comments.append( "Unscaled data {} to {} read in from measurement {}, samples {} to {}" .format( startTime, stopTime, self.dataPath, options["startSample"], options["endSample"], )) comments.append("Sampling frequency {}".format(self.getSampleFreq())) comments.append("Data read from {} files in total".format( len(dataFilesToRead))) comments.append("Scaling = {}".format(options["scale"])) # loop through chans and get data sampleCounter = 0 for dFile, sToRead, scalar in zip(dataFilesToRead, samplesToRead, scalings): # calculate the starting byte and the number of bytes to read byteReadStart = self.dataByteOffset + sToRead[ 0] * self.recordByteSize dSamples = sToRead[1] - sToRead[0] + 1 dSamplesRead = dSamples * self.getNumChannels() bytesToRead = dSamples * self.recordByteSize # read dFileHandle = open(dFile, "rb") dFileHandle.seek(byteReadStart, 0) # seek to start byte from start of file dataBytes = dFileHandle.read(bytesToRead) dFileHandle.close() dataRead = self.readRecords(dataBytes, dSamples) # now need to unpack this for chan in options["chans"]: # check to make sure channel exists self.checkChan(chan) # get the channel index - the chanIndex should give the right order in the data file chanIndex = self.chanMap[chan] # use the range sampleCounter -> sampleCounter + dSamples, because this actually means sampleCounter + dSamples - 1 as python ranges are not inclusive of the end value data[chan][sampleCounter:sampleCounter + dSamples] = dataRead[chanIndex:dSamplesRead:self. getNumChannels()] if options["scale"]: data[chan][sampleCounter:sampleCounter + dSamples] = ( data[chan][sampleCounter:sampleCounter + dSamples] * scalar[chan][0] + scalar[chan][1]) comments.append( "Scaling channel {} of file {} with multiplier {} and adding {}" .format(chan, dFile, scalar[chan][0], scalar[chan][1])) # increment sample counter sampleCounter = sampleCounter + dSamples # get ready for the next data read # return data return TimeData( sampleFreq=self.getSampleFreq(), startTime=startTime, stopTime=stopTime, data=data, comments=comments, )