def reformatContinuous(self, path: str): """Write out the continuous time series in internal format Parameters ---------- path : str Path to write out reformatted continuous recording """ writer = TimeWriterInternal() outpath = "meas_ts{}_{}_{}".format( self.continuous, self.getStartDatetime().strftime("%Y-%m-%d-%H-%M-%S"), self.getStopDatetime().strftime("%Y-%m-%d-%H-%M-%S"), ) outpath = os.path.join(path, outpath) writer.setOutPath(outpath) headers = self.getHeaders() chanHeaders, chanMap = self.getChanHeaders() writer.writeData(headers, chanHeaders, self.getPhysicalSamples(), physical=True)
timeOriginal1 = readerATS.getPhysicalData("2012-02-10 11:05:00", "2012-02-10 11:09:00", remaverage=False) timeOriginal2 = readerATS.getPhysicalData("2012-02-10 11:10:00", "2012-02-10 11:14:00", remaverage=False) from resistics.time.writer_internal import TimeWriterInternal # create a new site proj.createSite("site1_gaps") proj.refresh() writer = TimeWriterInternal() writer.setOutPath( Path(proj.timePath, "site1_gaps", "meas_2012-02-10_11-05-00_section1")) writer.writeData(headers, chanHeaders, timeOriginal1, physical=True) writer.setOutPath( Path(proj.timePath, "site1_gaps", "meas_2012-02-10_11-05-00_section2")) writer.writeData(headers, chanHeaders, timeOriginal2, physical=True) from resistics.project.time import viewTime # now view time fig = viewTime( proj, "2012-02-10 11:05:00", "2012-02-10 11:14:00", sites=["site1", "site1_gaps"], filter={"lpfilt": 16}, chans=["Ex", "Hy"], show=False,
fig.tight_layout(rect=[0, 0.02, 1, 0.96]) plt.show() fig.savefig(timeImages / "spam_vs_internal.png") # all we see is 50Hz and 16Hz noise - apply a band pass filter from resistics.time.filter import bandPass filteredSPAMData = bandPass(physicalSPAMData, 0.2, 16, inplace=False) filteredSPAMData.printInfo() # write out a filtered data - this is a subset of the data spam_2filteredSubset = timePath / "spamInternalFiltered" writer.setOutPath(spam_2filteredSubset) chanHeaders, chanMap = spamReader.getChanHeaders() writer.writeData(spamReader.getHeaders(), chanHeaders, filteredSPAMData, physical=True) # let's try reading in again internalReaderFiltered = TimeReaderInternal(spam_2filteredSubset) internalReaderFiltered.printInfo() internalReaderFiltered.printComments() # get the internal formatted filtered data filteredInternalData = internalReaderFiltered.getPhysicalSamples() filteredInternalData.printInfo() # plot this against the original fig = plt.figure(figsize=(16, 3 * physicalSPAMData.numChans)) filteredSPAMData.view(fig=fig, sampleStop=5000, label="filtered SPAM format") filteredInternalData.view(fig=fig,
def reformatHigh(self, path: str, **kwargs) -> None: """Write out high frequency time series in internal format Parameters ---------- path : str Directory to write out the reformatted time series ts : List[int], optional A list of the high frequency ts files to reformat. By default, all of the higher frequency recordings are reformatted """ writer = TimeWriterInternal() for idx, ts in enumerate(self.tsNums): if "ts" in kwargs and ts not in kwargs["ts"]: continue # do not reformat this one # let's get the headers headers = self.getHeaders() chanHeaders, chanMap = self.getChanHeaders() chans = self.getChannels() # now go through the different ts files to get ready to output if ts == self.continuous: continue sampleFreq = self.tsSampleFreqs[idx] # set sample frequency in headers headers["sample_freq"] = sampleFreq for cH in chanHeaders: cH["sample_freq"] = sampleFreq # now open the data file dFile = open(self.dataF[idx], "rb") # each record has to be read separately and then compare time to previous outStartTime = datetime.strptime(self.recordStarts[ts][0], "%Y-%m-%d %H:%M:%S.%f") # set up the data dictionary data = {} for record, startDate in enumerate(self.recordStarts[ts]): # start date is a string startByte = self.recordBytes[ts][record] startDateTime = datetime.strptime(startDate, "%Y-%m-%d %H:%M:%S.%f") # read the record - numpy does not support 24 bit two's complement (3 bytes) - hence use struct bytesToRead = (self.recordScans[ts][record] * self.sampleByteSize * self.getNumChannels()) dFile.seek(startByte, 0) # seek to start byte from start of file dataBytes = dFile.read(bytesToRead) dataRead = self.twosComplement(dataBytes) dataRecord = {} for chan in chans: # as it is the same order as in the header file chanIndex = self.chanMap[chan] dataRecord[chan] = dataRead[ chanIndex:self.recordScans[ts][record] * self.getNumChannels():self.getNumChannels()] # need to compare to previous record if record != 0 and startDateTime != prevEndTime: # then need to write out the current data before saving the new data # write out current data outStopTime = prevEndTime - timedelta( seconds=1.0 / sampleFreq ) # because inclusive of first sample (previous end time for continuity comparison) # calculate number of samples numSamples = data[chans[0]].size headers["start_date"] = outStartTime.strftime("%Y-%m-%d") headers["start_time"] = outStartTime.strftime( "%H:%M:%S.%f") headers["stop_date"] = outStopTime.strftime("%Y-%m-%d") headers["stop_time"] = outStopTime.strftime("%H:%M:%S.%f") headers["num_samples"] = numSamples for cH in chanHeaders: cH["start_date"] = headers["start_date"] cH["start_time"] = headers["start_time"] cH["stop_date"] = headers["stop_date"] cH["stop_time"] = headers["stop_time"] cH["num_samples"] = numSamples # get the outpath dataOutpath = os.path.join( path, "meas_ts{}_{}_{}".format( ts, outStartTime.strftime("%Y-%m-%d-%H-%M-%S"), outStopTime.strftime("%Y-%m-%d-%H-%M-%S"), ), ) # create the timeData object comment = "Unscaled samples for interval {} to {} read in from measurement {}".format( outStartTime, outStopTime, self.dataF[idx]) timeData = TimeData( sampleFreq=self.getSampleFreq(), startTime=outStartTime, stopTime=outStopTime, data=data, comments=comment, ) # write out writer.setOutPath(dataOutpath) writer.writeData(headers, chanHeaders, timeData) # then save current data outStartTime = startDateTime data = copy.deepcopy(dataRecord) prevEndTime = startDateTime + timedelta(seconds=( (1.0 / sampleFreq) * self.recordScans[ts][record])) else: # then record == 0 or startDateTime == prevEndTime # update prevEndTime prevEndTime = startDateTime + timedelta(seconds=( (1.0 / sampleFreq) * self.recordScans[ts][record])) if record == 0: data = copy.deepcopy(dataRecord) continue # otherwise, want to concatenate the data for chan in chans: data[chan] = np.concatenate( (data[chan], dataRecord[chan])) # close the data file dFile.close()
def preProcess(projData: ProjectData, **kwargs) -> None: """Pre-process project time data Preprocess the time data using filters, notch filters, resampling or interpolation. A new measurement folder is created under the site. The name of the new measurement folder is: prepend_[name of input measurement]_postpend. By default, prepend is "proc" and postpend is empty. Processed time series data can be saved in a new site by using the outputsite option. Parameters ---------- projData : ProjectData A project data object sites : str, List[str], optional Either a single site or a list of sites sampleFreqs : int, float, List[float], optional The frequencies to preprocess start : str, optional Start date of data to preprocess in format "%Y-%m-%d %H:%M:%S" stop : str, optional Stop date of data to process in format "%Y-%m-%d %H:%M:%S" outputsite : str, optional A site to output the preprocessed time data to. If this site does not exist, it will be created polreverse : Dict[str, bool] Keys are channels and values are boolean flags for reversing scale : Dict[str, float] Keys are channels and values are floats to multiply the channel data by calibrate : bool, optional Boolean flag for calibrating the data. Default is false and setting to True will calibrate where files can be found. normalise : bool, optional Boolean flag for normalising the data. Default is False and setting to True will normalise each channel independently. filter : Dict, optional Filtering options in a dictionary notch : List[float], optional List of frequencies to notch in spectra given as a list of floats resamp : Dict, optional Resampling parameters in a dictionary with entries in the format: {sampleRateFrom: sampleRateTo}. All measurement directories of sampleRateFrom will be resampled to sampleRateTo interp : bool, optional Boolean flag for interpolating the data on to the second, so that sampling is coincident with seconds. This is not always the case. For example, SPAM data is not necessarily sampled on the second, whereas ATS data is. This function is useful when combining data of multiple formats. Interpolation does not change the sampling rate. Default is False. prepend : str, optional String to prepend to the output folder. Default is "proc". postpend : str, optional String to postpend to the output folder. Default is empty. """ from resistics.project.shortcuts import getCalibrator from resistics.project.preprocess import ( applyPolarisationReversalOptions, applyScaleOptions, applyCalibrationOptions, applyFilterOptions, applyInterpolationOptions, applyNormaliseOptions, applyNotchOptions, applyResampleOptions, ) options: Dict = {} options["sites"]: List = projData.getSites() options["sampleFreqs"]: List[float] = projData.getSampleFreqs() options["start"]: Union[bool, str] = False options["stop"]: Union[bool, str] = False options["outputsite"]: str = "" options["polreverse"]: Union[bool, Dict[str, bool]] = False options["scale"]: Union[bool, Dict[str, float]] = False options["calibrate"]: bool = False options["normalise"]: bool = False options["filter"]: Dict = {} options["notch"]: List[float] = [] options["resamp"]: Dict = {} options["interp"]: bool = False options["prepend"]: str = "proc" options["postpend"]: str = "" options = parseKeywords(options, kwargs) # print info text: List = ["Processing with options"] for op, val in options.items(): text.append("\t{} = {}".format(op, val)) projectBlock(text) if isinstance(options["sites"], str): options["sites"] = [options["sites"]] # outputting to another site if options["outputsite"] != "": projectText("Preprocessed data will be saved to output site {}".format( options["outputsite"])) # create the site projData.createSite(options["outputsite"]) projData.refresh() outputSitePath = projData.getSiteData(options["outputsite"]).timePath # output naming outPre = options["prepend"] + "_" if options["prepend"] != "" else "" outPost = "_" + options["postpend"] if options["postpend"] != "" else "" if outPre == "" and outPost == "" and options["outputsite"] == "": outPre = "proc_" # create a data calibrator writer instance cal = getCalibrator(projData.calPath, projData.config) if options["calibrate"]: cal.printInfo() writer = TimeWriterInternal() # format dates if options["start"]: options["start"] = datetime.strptime(options["start"], "%Y-%m-%d %H:%M:%S") if options["stop"]: options["stop"] = datetime.strptime(options["stop"], "%Y-%m-%d %H:%M:%S") for site in options["sites"]: siteData = projData.getSiteData(site) siteData.printInfo() # loop over frequencies for sampleFreq in options["sampleFreqs"]: measurements = siteData.getMeasurements(sampleFreq) if len(measurements) == 0: # no data files at this sample rate continue # otherwise, process for meas in measurements: # get the reader projectText("Processing site {}, measurement {}".format( site, meas)) reader = siteData.getMeasurement(meas) startTime = reader.getStartDatetime() stopTime = reader.getStopDatetime() if (options["start"] or options["stop"]) and not checkDateOptions( options, startTime, stopTime): continue # if the data contributes, copy in the data if relevant if options["start"]: startTime = options["start"] if options["stop"]: stopTime = options["stop"] # calculate the samples sampleStart, sampleEnd = reader.time2sample( startTime, stopTime) # now get the data timeData = reader.getPhysicalSamples(startSample=sampleStart, endSample=sampleEnd) timeData.printInfo() headers = reader.getHeaders() chanHeaders, _ = reader.getChanHeaders() # apply options applyPolarisationReversalOptions(options, timeData) applyScaleOptions(options, timeData) applyCalibrationOptions(options, cal, timeData, reader) applyFilterOptions(options, timeData) applyNotchOptions(options, timeData) applyInterpolationOptions(options, timeData) applyResampleOptions(options, timeData) applyNormaliseOptions(options, timeData) # output dataset path if options["outputsite"] != "": timePath = outputSitePath else: timePath = siteData.timePath outPath = os.path.join(timePath, "{}{}{}".format(outPre, meas, outPost)) # write time data - need to manually change some headers (hence the keywords) writer = TimeWriterInternal() writer.setOutPath(outPath) writer.writeData( headers, chanHeaders, timeData, start_time=timeData.startTime.strftime("%H:%M:%S.%f"), start_date=timeData.startTime.strftime("%Y-%m-%d"), stop_time=timeData.stopTime.strftime("%H:%M:%S.%f"), stop_date=timeData.stopTime.strftime("%Y-%m-%d"), numSamples=timeData.numSamples, sample_freq=timeData.sampleFreq, physical=True, ) writer.printInfo()
from resistics.time.interp import interpolateToSecond interpData = interpolateToSecond(spamData, inplace=False) interpData.printInfo() # can now write out the interpolated dataset from resistics.time.writer_internal import TimeWriterInternal interpPath = timePath / "spamInterp" headers = spamReader.getHeaders() chanHeaders, chanMap = spamReader.getChanHeaders() writer = TimeWriterInternal() writer.setOutPath(interpPath) writer.writeData( headers, chanHeaders, interpData, physical=True, ) writer.printInfo() # read in the internal data from resistics.time.reader_internal import TimeReaderInternal interpReader = TimeReaderInternal(interpPath) interpReader.printInfo() interpReader.printComments() # get data between a time range startTime = "2016-02-07 02:10:00" stopTime = "2016-02-07 02:30:00" spamData = spamReader.getPhysicalData(startTime, stopTime)