def olsModel(A, y, **kwargs) -> Dict[str, Any]: r"""Ordinary least squares Solves for :math:`x` where, .. math:: y = Ax . Parameters ---------- A : np.ndarray Predictors, size nobs*nregressors y : np.ndarray Observations, size nobs intercept : bool, optional True or False for adding an intercept term Returns ------- RegressionData RegressionData instance with the parameters and residuals """ from resistics.regression.data import RegressionData import numpy.linalg as linalg options = parseKeywords(defaultOptions(), kwargs, printkw=False) if options["intercept"]: # add a constant term for the intercept A = np.hstack((np.ones(shape=(A.shape[0], 1), dtype="complex"), A)) params, _squareResid, _rank, _s = linalg.lstsq(A, y, rcond=None) resids = y - np.dot(A, params) return RegressionData(A, y, params=params, resids=resids)
def plotOptionsTipper(**kwargs) -> Dict: """Get default plot options for plotting transfer function data Parameters ---------- figsize : Tuple, optional Set the figure size plotfonts : Dict, optional Font sizes to use for plotting fonts block : bool, optional Boolean flag for blocking execution when plot is shown length_ylim : List[float], optional y limits for length data angle_ylim : List[float], optional y limits for angle data xlim : List[float], optional x limits for transfer function data Returns ------- out : Dict Dictionary of default plot options for plotting spectra data """ default = plotOptionsStandard() default["figsize"] = (16, 5) default["length_ylim"] = [0.001, 1000] default["angle_ylim"] = [-30, 30] default["xlim"] = [0.0001, 10000] default = parseKeywords(default, kwargs) return default
def plotOptionsTime(**kwargs) -> Dict: """Get default plot options for plotting time data Parameters ---------- figsize : Tuple, optional Set the figure size plotfonts : Dict, optional Font sizes to use for plotting fonts block : bool, optional Boolean flag for blocking execution when plot is shown Eylim : List[float], optional y limits for electric data Hylim : List[float], optional y limits for magnetic data Returns ------- out : Dict Dictionary of default plot options for plotting time data """ default = plotOptionsStandard() default["Eylim"] = [] default["Hylim"] = [] default = parseKeywords(default, kwargs) return default
def getMaskData(projData: ProjectData, site: str, maskName: str, sampleFreq: Union[float, int], **kwargs) -> MaskData: """Get a mask data object Parameters ---------- projData : projectData A project instance site : str The site for which to get the mask maskName : str The name of the mask sampleFreq : int, float The sampling frequency for which the mask was created specdir : str The spectra directory for which the mask was created Returns ------- MaskData A mask data object with the mask information """ options = {} options["specdir"] = projData.config.configParams["Spectra"]["specdir"] options = parseKeywords(options, kwargs) siteData = projData.getSiteData(site) maskIO = MaskIO(siteData.getSpecdirMaskPath(options["specdir"])) maskData = maskIO.read(maskName, sampleFreq) return maskData
def plotOptionsTransferFunction(**kwargs) -> Dict: """Get default plot options for plotting transfer function data Parameters ---------- figsize : Tuple, optional Set the figure size plotfonts : Dict, optional Font sizes to use for plotting fonts block : bool, optional Boolean flag for blocking execution when plot is shown res_ylim : List[float], optional y limits for resistivity data phase_ylim : List[float], optional y limits for phase data xlim : List[float], optional x limits for transfer function data Returns ------- out : Dict Dictionary of default plot options for plotting spectra data """ default = plotOptionsStandard() default["figsize"] = None default["res_ylim"] = [0.01, 10000] default["phase_ylim"] = [-20, 90] default["xlim"] = [0.0001, 10000] default = parseKeywords(default, kwargs) return default
def getStatisticDataForSampleFreq( projData: ProjectData, site: str, sampleFreq: float, stat: str, declevel: int = 0, **kwargs ) -> List[StatisticData]: """Get the statistic data (for a particular decimation level) for all measurements in a site with sampling frequency sampleFreq Parameters ---------- projData : ProjectData Project instance site : str The site for which to get the statistic data sampleFreq : float The sampling frequency stat : str The statistic for which to get the measurement declevel : int, optional The decimation level to read in. Default is 0. specdir : str, optional The spectra directory Returns ------- Dict[str, StatisticData] A statistic data object """ from resistics.statistics.io import StatisticIO options = {} options["specdir"] = projData.config.configParams["Spectra"]["specdir"] options = parseKeywords(options, kwargs) siteData = projData.getSiteData(site) if not siteData: projectError("Unable to find site {} in project".format(site), quitrun=True) # load the statistic data statData: Dict[str, StatisticData] = {} statIO = StatisticIO() measurements = siteData.getMeasurements(sampleFreq) for meas in measurements: statIO.setDatapath( os.path.join(siteData.getMeasurementStatPath(meas), options["specdir"]) ) # make sure some data was found chk = statIO.read(stat, declevel) if chk is not None: statData[meas] = statIO.read(stat, declevel) else: projectWarning( "No {} data found for site {} and measurement {}".format( stat, site, meas ) ) return statData
def processProject(projData: ProjectData, **kwargs) -> None: """Process a project Parameters ---------- projData : ProjectData The project data instance for the project sites : List[str], optional List of sites sampleFreqs : List[float], optional List of sample frequencies to process specdir : str, optional The spectra directories to use inchans : List[str], optional Channels to use as the input of the linear system inputsite : str, optional Site from which to take the input channels. The default is to use input and output channels from the same site outchans : List[str], optional Channels to use as the output of the linear system remotesite : str, optional The site to use as the remote site remotechans : List[str], optional Channels to use from the remote reference site crosschannels : List[str], optional List of channels to use for cross powers masks : Dict, optional Masks dictionary for passing mask data. The key should be a site name and the value should either be a string for a single mask or a list of multiple masks. datetimes : List, optional List of datetime constraints, each one as a dictionary. For example [{"type": "datetime", "start": 2018-08-08 00:00:00, "end": 2018-08-08 16:00:00, "levels": [0,1]}]. Note that levels is optional. postpend : str, optional String to postpend to the transfer function output ncores : int, optional The number of cores to run the transfer function calculations on """ options: Dict = dict() options["sites"]: List[str] = projData.getSites() options["sampleFreqs"]: List[float] = projData.getSampleFreqs() options["specdir"]: str = projData.config.configParams["Spectra"][ "specdir"] options["inchans"]: List[str] = ["Hx", "Hy"] options["inputsite"]: str = "" options["outchans"]: List[str] = ["Ex", "Ey"] options["remotesite"]: str = "" options["remotechans"]: List[str] = options["inchans"] options["crosschannels"]: List[str] = [] options["masks"]: Dict = {} options["datetimes"]: List = [] options["postpend"]: str = "" options["ncores"] = projData.config.getSolverCores() options = parseKeywords(options, kwargs) for site in options["sites"]: siteData = projData.getSiteData(site) siteFreqs = siteData.getSampleFreqs() for sampleFreq in siteFreqs: # check if not included if sampleFreq not in options["sampleFreqs"]: continue processSite(projData, site, sampleFreq, **options)
def test_parseKeywords() -> None: from resistics.common.checks import parseKeywords default = {"test1": 1, "test2": 2} keywords = {"test1": 3} parsed = parseKeywords(default, keywords) assert parsed["test1"] == 3 assert parsed["test2"] == 2
def getTransferFunctionData(projData: ProjectData, site: str, sampleFreq: float, **kwargs) -> TransferFunctionData: """Get transfer function data Parameters ---------- projData : projecData The project data site : str Site to get the transfer functiond data for sampleFreq : int, float The sampling frequency for which to get the transfer function data specdir : str, optional The spectra directories used postpend : str, optional The postpend on the transfer function files """ from resistics.transfunc.io import TransferFunctionReader options: Dict = dict() options["specdir"]: str = projData.config.configParams["Spectra"][ "specdir"] options["postpend"]: str = "" options = parseKeywords(options, kwargs) # deal with the postpend if options["postpend"] != "": postpend = "_{}".format(options["postpend"]) else: postpend = options["postpend"] siteData = projData.getSiteData(site) sampleFreqStr = fileFormatSampleFreq(sampleFreq) path = os.path.join( siteData.transFuncPath, "{:s}".format(sampleFreqStr), "{}_fs{:s}_{}{}".format(site, sampleFreqStr, options["specdir"], postpend), ) # check path if not checkFilepath(path): projectWarning("No transfer function file with name {}".format(path)) return False projectText( "Reading transfer function for site {}, sample frequency {}, file {}". format(site, sampleFreq, path)) tfReader = TransferFunctionReader(path) tfReader.printInfo() return tfReader.tfData
def getSpecReader(projData: ProjectData, site: str, meas: str, **kwargs) -> Union[SpectrumReader, None]: """Get the spectrum reader for a measurement Parameters ---------- site : str Site for which to get the spectra reader meas : str The measurement options : Dict Options in a dictionary declevel : int, optional Decimation level for which to get data specdir : str, optional String that specifies spectra directory for the measurement Returns ------- SpectrumReader The SpectrumReader object or None if data does not exist """ options = {} options["declevel"]: int = 0 options["specdir"]: str = projData.config.configParams["Spectra"][ "specdir"] options = parseKeywords(options, kwargs) siteData = projData.getSiteData(site) measurements = siteData.getMeasurements() if meas not in measurements: projectError("Measurement directory {} not found".format(meas), quitrun=True) # create the spectrum reader specReader = SpectrumReader( os.path.join(siteData.getMeasurementSpecPath(meas), options["specdir"])) specReader.printInfo() # open the spectra file for the current decimation level if it exists check = specReader.openBinaryForReading("spectra", options["declevel"]) if not check: projectWarning("Spectra file does not exist at level {}".format( options["declevel"])) return None return specReader
def getStatisticData( projData: ProjectData, site: str, meas: str, stat: str, declevel: int = 0, **kwargs ) -> StatisticData: """Get the statistic data for a statistic for a site measurement Parameters ---------- projData : ProjectData Project instance site : str The site for which to get the statistic data meas : str The measurement for which to get the statistic stat : str The statistic for which to get the measurement declevel : int, optional The decimation level to read in. Default is 0. specdir : str, optional The spectra directory Returns ------- StatisticData, None A StatisticData object or None if the statistic data does not exist """ from resistics.statistics.io import StatisticIO options = {} options["specdir"] = projData.config.configParams["Spectra"]["specdir"] options = parseKeywords(options, kwargs) siteData = projData.getSiteData(site) if not siteData: projectError("Unable to find site {} in project".format(site), quitrun=True) # load the statistic data statIO = StatisticIO() statIO.setDatapath( os.path.join(siteData.getMeasurementStatPath(meas), options["specdir"]) ) statData = statIO.read(stat, declevel) return statData
def plotOptionsStandard(**kwargs) -> Dict: """Get a set of standard plot options Parameters ---------- figsize : Tuple, optional Set the figure size plotfonts : Dict, optional Font sizes to use for plotting fonts block : bool, optional Boolean flag for blocking execution when plot is shown Returns ------- Dict Dictionary of standard plot options """ default: Dict = {} default["figsize"] = (20, 12) default["plotfonts"] = getViewFonts() default["block"] = True default = parseKeywords(default, kwargs) return default
def plotOptionsSpec(**kwargs) -> Dict: """Get default plot options for plotting spectra data Parameters ---------- figsize : Tuple, optional Set the figure size plotfonts : Dict, optional Font sizes to use for plotting fonts block : bool, optional Boolean flag for blocking execution when plot is shown amplim : List[float], optional Amplitude limits for plotting spectra Returns ------- out : Dict Dictionary of default plot options for plotting spectra data """ default = plotOptionsStandard() default["amplim"] = [] default = parseKeywords(default, kwargs) return default
def preProcess(projData: ProjectData, **kwargs) -> None: """Pre-process project time data Preprocess the time data using filters, notch filters, resampling or interpolation. A new measurement folder is created under the site. The name of the new measurement folder is: prepend_[name of input measurement]_postpend. By default, prepend is "proc" and postpend is empty. Processed time series data can be saved in a new site by using the outputsite option. Parameters ---------- projData : ProjectData A project data object sites : str, List[str], optional Either a single site or a list of sites sampleFreqs : int, float, List[float], optional The frequencies to preprocess start : str, optional Start date of data to preprocess in format "%Y-%m-%d %H:%M:%S" stop : str, optional Stop date of data to process in format "%Y-%m-%d %H:%M:%S" outputsite : str, optional A site to output the preprocessed time data to. If this site does not exist, it will be created polreverse : Dict[str, bool] Keys are channels and values are boolean flags for reversing scale : Dict[str, float] Keys are channels and values are floats to multiply the channel data by calibrate : bool, optional Boolean flag for calibrating the data. Default is false and setting to True will calibrate where files can be found. normalise : bool, optional Boolean flag for normalising the data. Default is False and setting to True will normalise each channel independently. filter : Dict, optional Filtering options in a dictionary notch : List[float], optional List of frequencies to notch in spectra given as a list of floats resamp : Dict, optional Resampling parameters in a dictionary with entries in the format: {sampleRateFrom: sampleRateTo}. All measurement directories of sampleRateFrom will be resampled to sampleRateTo interp : bool, optional Boolean flag for interpolating the data on to the second, so that sampling is coincident with seconds. This is not always the case. For example, SPAM data is not necessarily sampled on the second, whereas ATS data is. This function is useful when combining data of multiple formats. Interpolation does not change the sampling rate. Default is False. prepend : str, optional String to prepend to the output folder. Default is "proc". postpend : str, optional String to postpend to the output folder. Default is empty. """ from resistics.project.shortcuts import getCalibrator from resistics.project.preprocess import ( applyPolarisationReversalOptions, applyScaleOptions, applyCalibrationOptions, applyFilterOptions, applyInterpolationOptions, applyNormaliseOptions, applyNotchOptions, applyResampleOptions, ) options: Dict = {} options["sites"]: List = projData.getSites() options["sampleFreqs"]: List[float] = projData.getSampleFreqs() options["start"]: Union[bool, str] = False options["stop"]: Union[bool, str] = False options["outputsite"]: str = "" options["polreverse"]: Union[bool, Dict[str, bool]] = False options["scale"]: Union[bool, Dict[str, float]] = False options["calibrate"]: bool = False options["normalise"]: bool = False options["filter"]: Dict = {} options["notch"]: List[float] = [] options["resamp"]: Dict = {} options["interp"]: bool = False options["prepend"]: str = "proc" options["postpend"]: str = "" options = parseKeywords(options, kwargs) # print info text: List = ["Processing with options"] for op, val in options.items(): text.append("\t{} = {}".format(op, val)) projectBlock(text) if isinstance(options["sites"], str): options["sites"] = [options["sites"]] # outputting to another site if options["outputsite"] != "": projectText("Preprocessed data will be saved to output site {}".format( options["outputsite"])) # create the site projData.createSite(options["outputsite"]) projData.refresh() outputSitePath = projData.getSiteData(options["outputsite"]).timePath # output naming outPre = options["prepend"] + "_" if options["prepend"] != "" else "" outPost = "_" + options["postpend"] if options["postpend"] != "" else "" if outPre == "" and outPost == "" and options["outputsite"] == "": outPre = "proc_" # create a data calibrator writer instance cal = getCalibrator(projData.calPath, projData.config) if options["calibrate"]: cal.printInfo() writer = TimeWriterInternal() # format dates if options["start"]: options["start"] = datetime.strptime(options["start"], "%Y-%m-%d %H:%M:%S") if options["stop"]: options["stop"] = datetime.strptime(options["stop"], "%Y-%m-%d %H:%M:%S") for site in options["sites"]: siteData = projData.getSiteData(site) siteData.printInfo() # loop over frequencies for sampleFreq in options["sampleFreqs"]: measurements = siteData.getMeasurements(sampleFreq) if len(measurements) == 0: # no data files at this sample rate continue # otherwise, process for meas in measurements: # get the reader projectText("Processing site {}, measurement {}".format( site, meas)) reader = siteData.getMeasurement(meas) startTime = reader.getStartDatetime() stopTime = reader.getStopDatetime() if (options["start"] or options["stop"]) and not checkDateOptions( options, startTime, stopTime): continue # if the data contributes, copy in the data if relevant if options["start"]: startTime = options["start"] if options["stop"]: stopTime = options["stop"] # calculate the samples sampleStart, sampleEnd = reader.time2sample( startTime, stopTime) # now get the data timeData = reader.getPhysicalSamples(startSample=sampleStart, endSample=sampleEnd) timeData.printInfo() headers = reader.getHeaders() chanHeaders, _ = reader.getChanHeaders() # apply options applyPolarisationReversalOptions(options, timeData) applyScaleOptions(options, timeData) applyCalibrationOptions(options, cal, timeData, reader) applyFilterOptions(options, timeData) applyNotchOptions(options, timeData) applyInterpolationOptions(options, timeData) applyResampleOptions(options, timeData) applyNormaliseOptions(options, timeData) # output dataset path if options["outputsite"] != "": timePath = outputSitePath else: timePath = siteData.timePath outPath = os.path.join(timePath, "{}{}{}".format(outPre, meas, outPost)) # write time data - need to manually change some headers (hence the keywords) writer = TimeWriterInternal() writer.setOutPath(outPath) writer.writeData( headers, chanHeaders, timeData, start_time=timeData.startTime.strftime("%H:%M:%S.%f"), start_date=timeData.startTime.strftime("%Y-%m-%d"), stop_time=timeData.stopTime.strftime("%H:%M:%S.%f"), stop_date=timeData.stopTime.strftime("%Y-%m-%d"), numSamples=timeData.numSamples, sample_freq=timeData.sampleFreq, physical=True, ) writer.printInfo()
def viewTime(projData: ProjectData, startDate: str, endDate: str, **kwargs) -> Union[Figure, None]: """View timeseries in the project Parameters ---------- projData : ProjectData The project data instance startDate : str The start of the data range to plot endDate : str The end of the date range to plot sites : List[str], optional List of sites sampleFreqs : List[float], optional List of sample frequencies to plot chans : List[str], optional List of channels to plot polreverse : Dict[str, bool] Keys are channels and values are boolean flags for reversing scale : Dict[str, float] Keys are channels and values are floats to multiply the channel data by calibrate : bool, optional Boolean flag to calibrate data normalise : bool, optional Boolean flag to normalise the data. Default is False and setting to True will normalise each channel independently. notch : List[float], optional List of frequencies to notch out filter : Dict, optional Filter parameters show : bool, optional Boolean flag to show the plot save : bool, optional Boolean flag to save the plot to images folder plotoptions : Dict Dictionary of plot options Returns ------- matplotlib.pyplot.figure or None A matplotlib figure unless the plot is not shown and is saved, in which case None and the figure is closed. """ from resistics.project.shortcuts import getCalibrator from resistics.project.preprocess import ( applyPolarisationReversalOptions, applyScaleOptions, applyCalibrationOptions, applyFilterOptions, applyNormaliseOptions, applyNotchOptions, ) from resistics.common.plot import savePlot, plotOptionsTime options = {} options["sites"]: List[str] = projData.sites options["sampleFreqs"]: Union[List[float], List[str]] = projData.getSampleFreqs() options["chans"]: List[str] = ["Ex", "Ey", "Hx", "Hy", "Hz"] options["polreverse"]: Union[bool, Dict[str, bool]] = False options["scale"]: Union[bool, Dict[str, float]] = False options["calibrate"]: bool = False options["normalise"]: bool = False options["filter"]: Dict = {} options["notch"]: List[float] = [] options["show"]: bool = True options["save"]: bool = False options["plotoptions"]: Dict = plotOptionsTime() options = parseKeywords(options, kwargs) # prepare calibrator cal = getCalibrator(projData.calPath, projData.config) if options["calibrate"]: cal.printInfo() # format startDate and endDate start = datetime.strptime("{}.000".format(startDate), "%Y-%m-%d %H:%M:%S.%f") stop = datetime.strptime("{}.000".format(endDate), "%Y-%m-%d %H:%M:%S.%f") # collect relevant data - dictionary to store timeData timeDataAll = {} for site in options["sites"]: siteData = projData.getSiteData(site) if isinstance(siteData, bool): # site does not exist continue siteData.printInfo() measurements = siteData.getMeasurements() timeDataAll[site] = {} # loop over measurements and save data for each one for meas in measurements: sampleFreq = siteData.getMeasurementSampleFreq(meas) if sampleFreq not in options["sampleFreqs"]: continue # check if data contributes to user defined time period siteStart = siteData.getMeasurementStart(meas) siteStop = siteData.getMeasurementEnd(meas) if siteStop < start or siteStart > stop: continue reader = siteData.getMeasurement(meas) # get the samples of the datetimes sampleStart, sampleStop = reader.time2sample(start, stop) # as the samples returned from time2sample are rounded use sample2time to get the appropriate start and end times for those samples readStart, readStop = reader.sample2time(sampleStart, sampleStop) # get the data for any available channels meaning even those sites with missing channels can be plotted timeData = reader.getPhysicalData(readStart, readStop) projectText( "Plotting measurement {} of site {} between {} and {}".format( meas, site, readStart, readStop)) # apply various options applyPolarisationReversalOptions(options, timeData) applyScaleOptions(options, timeData) applyCalibrationOptions(options, cal, timeData, reader) applyFilterOptions(options, timeData) applyNotchOptions(options, timeData) applyNormaliseOptions(options, timeData) timeDataAll[site][meas] = timeData # plot all the data plotfonts = options["plotoptions"]["plotfonts"] fig = plt.figure(figsize=options["plotoptions"]["figsize"]) for site in timeDataAll: for meas in timeDataAll[site]: timeData = timeDataAll[site][meas] timeData.view( sampleStop=timeDataAll[site][meas].numSamples - 1, fig=fig, chans=options["chans"], label="{} - {}".format(site, meas), xlim=[start, stop], plotfonts=plotfonts, ) # add the suptitle st = fig.suptitle( "Time data from {} to {}".format(start.strftime("%Y-%m-%d %H-%M-%S"), stop.strftime("%Y-%m-%d %H-%M-%S")), fontsize=plotfonts["suptitle"], ) st.set_y(0.98) # do the axis labels numChans = len(options["chans"]) for idx, chan in enumerate(options["chans"]): plt.subplot(numChans, 1, idx + 1) # do the yaxis if isElectric(chan): plt.ylabel("mV/km", fontsize=plotfonts["axisLabel"]) if len(options["plotoptions"]["Eylim"]) > 0: plt.ylim(options["plotoptions"]["Eylim"]) else: if options["calibrate"]: plt.ylabel("nT", fontsize=plotfonts["axisLabel"]) else: plt.ylabel("mV", fontsize=plotfonts["axisLabel"]) if len(options["plotoptions"]["Hylim"]) > 0: plt.ylim(options["plotoptions"]["Hylim"]) plt.legend(loc=1, fontsize=plotfonts["legend"]) # plot format fig.tight_layout(rect=[0, 0.02, 1, 0.96]) fig.subplots_adjust(top=0.92) # plot show and save if options["save"]: impath = projData.imagePath filename = "timeData_{}_{}".format( start.strftime("%Y-%m-%d_%H-%M-%S_"), stop.strftime("%Y-%m-%d_%H-%M-%S")) savename = savePlot(impath, filename, fig) projectText("Image saved to file {}".format(savename)) if options["show"]: plt.show(block=options["plotoptions"]["block"]) if not options["show"] and options["save"]: plt.close(fig) return None return fig
def mestimateModel(A: np.ndarray, y: np.ndarray, **kwargs) -> Dict[str, Any]: r"""Mestimate robust least squares Solves for :math:`x` where, .. math:: y = Ax . Good method for dependent outliers (in :math:`y`). Not robust against independent outliers (leverage points) Parameters ---------- A : np.ndarray Predictors, size nobs*nregressors y : np.ndarray Observations, size nobs initial : Dict Initial model parameters and scale scale : optional A scale estimate intercept : bool, optional True or False for adding an intercept term weights : str, optional The weights to use Returns ------- RegressionData RegressionData instance with the parameters, residuals, weights and scale """ from resistics.common.math import eps from resistics.regression.moments import getLocation, getScale from resistics.regression.weights import getWeights from resistics.regression.data import RegressionData import numpy.linalg as linalg options = parseKeywords(defaultOptions(), kwargs, printkw=False) # calculate the leverage n = A.shape[0] p = A.shape[1] # calculate the projection matrix q, r = linalg.qr(A) Pdiag = np.empty(shape=(n), dtype="float") for ii in range(0, n): Pdiag[ii] = np.absolute(np.sum(q[ii, :] * np.conjugate(q[ii, :]))).real Pdiag = Pdiag / np.max(Pdiag) leverageScale = getScale(Pdiag, "mad0") leverageWeights = getWeights(Pdiag / leverageScale, "huber") if options["intercept"] == True: # add column of ones for constant term A = np.hstack((np.ones(shape=(A.shape[0], 1), dtype="complex"), A)) # see whether to do an initial OLS model or whether one is provided if options["initial"]: params, resids, scale = initialModel(options["initial"]) else: soln = olsModel(A, y) resids = soln.resids scale = getScale(resids, "mad0") # if an initial model was not provided but an initial scale was, replace the one here if options["scale"]: scale = options["scale"] # standardised residuals and weights weights = getWeights(resids / scale, options["weights"]) * leverageWeights # iteratively weighted least squares iteration = 0 while iteration < options["maxiter"]: # do the weighted least-squares Anew, ynew = applyWeights(A, y, weights) paramsNew, _squareResidNew, _rankNew, _sNew = linalg.lstsq(Anew, ynew, rcond=None) residsNew = y - np.dot(A, paramsNew) if np.sum(np.absolute(residsNew)) < eps(): return RegressionData(A, y, params=paramsNew, resids=residsNew, scale=scale, weights=weights) # standardise and calculate weights scale = getScale(residsNew, "mad0") weightsNew = getWeights(residsNew / scale, options["weights"]) * leverageWeights # increment iteration and save weightsNew iteration = iteration + 1 weights = weightsNew params = paramsNew # check to see whether the change is smaller than the tolerance # use the R method of checking change in residuals (can check change in params) changeResids = linalg.norm(residsNew - resids) / linalg.norm(residsNew) if changeResids < eps(): # update residuals resids = residsNew break # update residuals resids = residsNew return RegressionData(A, y, params=params, resids=resids, scale=scale, weights=weights)
def chatterjeeMachler(A: np.ndarray, y: np.ndarray, **kwargs) -> Dict[str, Any]: r"""Robust bounded influence solver Solves for :math:`x` where, .. math:: y = Ax . Being a bounded influence operator, should be robust against both outliers in dependent and independent variables. Parameters ---------- A : np.ndarray Predictors, size nobs*nregressors y : np.ndarray Observations, size nobs intercept : bool, optional True or False for adding an intercept term Returns ------- Dict[str, Any] Dictionary with keys params, resids, resids2 and weights corresponding to parameters (solution), residuals, squared residuals and weights used in the weighted least squares respectively. """ from resistics.common.math import eps from resistics.regression.data import RegressionData import numpy.linalg as linalg options = parseKeywords(defaultOptions(), kwargs, printkw=False) if options["intercept"] == True: # add column of ones for constant term A = np.hstack((np.ones(shape=(A.shape[0], 1), dtype="complex"), A)) # now calculate p and n n = A.shape[0] p = A.shape[1] pnRatio = 1.0 * p / n # calculate the projection matrix q, r = linalg.qr(A) Pdiag = np.empty(shape=(n), dtype="float") for i in range(0, n): Pdiag[i] = np.absolute(np.sum(q[i, :] * np.conjugate(q[i, :]))).real # and save an array for later Pdiag = Pdiag / np.max(Pdiag) weightsNom = np.power(1.0 - Pdiag, 2) # weights for the first iteration tmp = np.ones(shape=(n), dtype="float") * pnRatio tmp = np.maximum(Pdiag, tmp) weights = np.reciprocal(tmp) # iteratively weighted least squares iteration = 0 while iteration < options["maxiter"]: # do the weighted least-squares Anew, ynew = applyWeights(A, y, weights) paramsNew, _squareResidNew, _rankNew, _sNew = linalg.lstsq(Anew, ynew, rcond=None) residsNew = y - np.dot(A, paramsNew) if np.sum(np.absolute(residsNew)) < eps(): return RegressionData(A, y, params=paramsNew, resids=residsNew, weights=weights) residsAbs = np.absolute(residsNew) residsMedian = np.median(residsAbs) # now compute the new weights weightsDenom = np.maximum( residsAbs, np.ones(shape=(n), dtype="float") * residsMedian) weightsNew = weightsNom / weightsDenom # increment iteration iteration = iteration + 1 weights = weightsNew params = paramsNew if iteration > 1: # check to see whether the change is smaller than the tolerance changeResids = linalg.norm(residsNew - resids) / linalg.norm(residsNew) if changeResids < eps(): # update resids resids = residsNew break # update resids resids = residsNew return RegressionData(A, y, params=params, resids=resids, weights=weights)
def mmestimateModel(A: np.ndarray, y: np.ndarray, **kwargs) -> Dict[str, Any]: r"""Two stage M estimate The two stage M estimate uses an initial mestimate with huber weights to give a measure of scale. A second M estimate is then performed using the calculated measure of scale. The second stage M estimate uses bisquare weights unless otherwise specified. Solves for :math:`x` where, .. math:: y = Ax . Parameters ---------- A : np.ndarray Predictors, size nobs*nregressors y : np.ndarray Observations, size nobs initial : Dict Initial solution with parameters, scale and residuals scale : optional A scale estimate intercept : bool, optional True or False for adding an intercept term Returns ------- RegressionData RegressionData instance with the parameters, residuals, weights and scale """ from resistics.regression.moments import getScale import numpy.linalg as linalg options = parseKeywords(defaultOptions(), kwargs, printkw=False) intercept = options["intercept"] if "initial" in kwargs: # an initial solution is provided if "scale" not in kwargs["initial"]: kwargs["initial"]["scale"] = getScale(kwargs["initial"]["resids"], "mad0") soln1 = mestimateModel(A, y, weights="huber", initial=kwargs["initial"], intercept=intercept) # update the scale in the initial solution and perform another mestimate kwargs["initial"]["scale"] = soln1.scale # now do another, but with a different weighting function soln2 = mestimateModel(A, y, weights="bisquare", initial=kwargs["initial"], intercept=intercept) else: # no initial solution, calculate one soln1 = mestimateModel(A, y, weights="huber", intercept=intercept) # now do another, but with a different weighting function soln2 = mestimateModel(A, y, weights="bisquare", scale=soln1.scale, intercept=intercept) return soln2
def viewStatisticDensityplot( projData: ProjectData, site: str, sampleFreq: Union[int, float], stat: str, crossplots: List[List[str]], **kwargs ) -> Union[Figure, None]: """View statistic data as a density plot for a single sampling frequency of a site Parameters ---------- projData : ProjectData A project instance site : str The site for which to plot statistics stat : str The statistic to plot sampleFreq : float The sampling frequency for which to plot statistics crossplots : List[List[str]] The statistic element pairs to crossplot declevel : int The decimation level to plot eFreqI : int The evaluation frequency index specdir : str The spectra directory maskname : str Mask name xlim : List, optional Limits for the x axis ylim : List, optional Limits for the y axis maxcols : int The maximum number of columns in the plots show : bool, optional Show the spectra plot save : bool, optional Save the plot to the images directory plotoptions : Dict, optional Dictionary of plot options Returns ------- matplotlib.pyplot.figure or None A matplotlib figure unless the plot is not shown and is saved, in which case None and the figure is closed. If no data was found, None. """ from resistics.common.plot import ( savePlot, plotOptionsSpec, getPlotRowsAndCols, colorbar2dSpectra, ) options = {} options["declevel"] = 0 options["eFreqI"] = 0 options["specdir"] = projData.config.configParams["Spectra"]["specdir"] options["maskname"] = "" options["xlim"] = [] options["ylim"] = [] options["maxcols"] = 2 options["show"] = True options["save"] = False options["plotoptions"] = plotOptionsSpec() options = parseKeywords(options, kwargs) projectText( "Plotting density plot for statistic {}, site {} and sampling frequency {}".format( stat, site, sampleFreq ) ) statData = getStatisticDataForSampleFreq( projData, site, sampleFreq, stat, declevel=options["declevel"], specdir=options["specdir"], ) statMeas = list(statData.keys()) if len(statMeas) == 0: projectWarning( "No statistic files for site {}, sampling frequency {}, statistic {} and decimation level {}".format( site, sampleFreq, stat, options["declevel"] ) ) return None # get the evaluation frequency eFreq = statData[statMeas[0]].evalFreq[options["eFreqI"]] # get the mask data maskWindows = [] if options["maskname"] != "": maskData = getMaskData(projData, site, options["maskname"], sampleFreq) maskWindows = maskData.getMaskWindowsFreq( options["declevel"], options["eFreqI"] ) # plot information nrows, ncols = getPlotRowsAndCols(options["maxcols"], len(crossplots)) plotfonts = options["plotoptions"]["plotfonts"] fig = plt.figure(figsize=options["plotoptions"]["figsize"]) # suptitle st = fig.suptitle( "{} density plots for {}, sampling frequency {} Hz,\ndecimation level {} and evaluation frequency {} Hz".format( stat, site, sampleFreq, options["declevel"], eFreq ), fontsize=plotfonts["suptitle"], ) st.set_y(0.98) # now plot the data for idx, cplot in enumerate(crossplots): ax = plt.subplot(nrows, ncols, idx + 1) plt.title("Crossplot {}".format(cplot), fontsize=plotfonts["title"]) plotAll1 = [] plotAll2 = [] for meas in statMeas: stats = statData[meas].getStats(maskwindows=maskWindows) plotI1 = statData[meas].winStats.index(cplot[0]) plotData1 = np.squeeze(stats[:, options["eFreqI"], plotI1]) plotI2 = statData[meas].winStats.index(cplot[1]) plotData2 = np.squeeze(stats[:, options["eFreqI"], plotI2]) # add to all data if plotData1.size == 0: continue if plotData1.size == 1: plotAll1 = plotAll1 + [float(plotData1)] plotAll2 = plotAll2 + [float(plotData2)] else: plotAll1 = plotAll1 + plotData1.tolist() plotAll2 = plotAll2 + plotData2.tolist() plotAll1 = np.array(plotAll1) plotAll2 = np.array(plotAll2) nbins = 200 if len(options["xlim"]) > 0: plt.xlim(options["xlim"]) rangex = options["xlim"] else: minx = np.percentile(plotAll1, 2) maxx = np.percentile(plotAll1, 98) ax.set_xlim(minx, maxx) rangex = [minx, maxx] if len(options["ylim"]) > 0: plt.ylim(options["ylim"]) rangey = options["ylim"] else: miny = np.percentile(plotAll2, 2) maxy = np.percentile(plotAll2, 98) ax.set_ylim(miny, maxy) rangey = [miny, maxy] plt.hist2d( plotAll1, plotAll2, bins=(nbins, nbins), range=[rangex, rangey], cmap=plt.cm.inferno, ) # axis format plt.xlabel(cplot[0], fontsize=plotfonts["axisLabel"]) plt.ylabel(cplot[1], fontsize=plotfonts["axisLabel"]) plt.grid(True) # set tick sizes for label in ax.get_xticklabels() + ax.get_yticklabels(): label.set_fontsize(plotfonts["axisTicks"]) # plot format, show and save # fig.tight_layout(rect=[0.02, 0.02, 0.98, 0.92]) if options["save"]: impath = projData.imagePath sampleFreqStr = fileFormatSampleFreq(sampleFreq) filename = "statDensityplot_{:s}_{:s}_{:s}_dec{:d}_efreq{:d}_{:s}".format( stat, site, sampleFreqStr, options["declevel"], options["eFreqI"], options["specdir"], ) if options["maskname"] != "": filename = "{}_{}".format(filename, options["maskname"]) savename = savePlot(impath, filename, fig) projectText("Image saved to file {}".format(savename)) if options["show"]: plt.show(block=options["plotoptions"]["block"]) if not options["show"] and options["save"]: plt.close(fig) return None return fig
def calculateSpectra(projData: ProjectData, **kwargs) -> None: """Calculate spectra for the project time data The philosophy is that spectra are calculated out for all data and later limited using statistics and time constraints Parameters ---------- projData : ProjectData A project data object sites : str, List[str], optional Either a single site or a list of sites sampleFreqs : int, float, List[float], optional The frequencies in Hz for which to calculate the spectra. Either a single frequency or a list of them. chans : List[str], optional The channels for which to calculate out the spectra polreverse : Dict[str, bool] Keys are channels and values are boolean flags for reversing scale : Dict[str, float] Keys are channels and values are floats to multiply the channel data by calibrate : bool, optional Flag whether to calibrate the data or not notch : List[float], optional List of frequencies to notch filter : Dict, optional Filter parameters specdir : str, optional The spectra directory to save the spectra data in ncores : int, optional The number of cores to run the transfer function calculations on """ from resistics.spectra.io import SpectrumWriter from resistics.decimate.decimator import Decimator from resistics.window.windower import Windower from resistics.project.shortcuts import ( getCalibrator, getDecimationParameters, getWindowParameters, ) from resistics.project.preprocess import ( applyPolarisationReversalOptions, applyScaleOptions, applyCalibrationOptions, applyFilterOptions, applyNotchOptions, ) options = {} options["sites"] = projData.getSites() options["sampleFreqs"]: List[float] = projData.getSampleFreqs() options["chans"]: List[str] = [] options["polreverse"]: Union[bool, Dict[str, bool]] = False options["scale"]: Union[bool, Dict[str, float]] = False options["calibrate"]: bool = True options["notch"]: List[float] = [] options["filter"]: Dict = {} options["specdir"]: str = projData.config.configParams["Spectra"][ "specdir"] options["ncores"] = projData.config.getSpectraCores() options = parseKeywords(options, kwargs) # prepare calibrator cal = getCalibrator(projData.calPath, projData.config) if options["calibrate"]: cal.printInfo() datetimeRef = projData.refTime for site in options["sites"]: siteData = projData.getSiteData(site) siteData.printInfo() # calculate spectra for each frequency for sampleFreq in options["sampleFreqs"]: measurements = siteData.getMeasurements(sampleFreq) projectText( "Site {} has {:d} measurement(s) at sampling frequency {:.2f}". format(site, len(measurements), sampleFreq)) if len(measurements) == 0: continue # no data files at this sample rate for meas in measurements: projectText( "Calculating spectra for site {} and measurement {}". format(site, meas)) # get measurement start and end times - this is the time of the first and last sample reader = siteData.getMeasurement(meas) startTime = siteData.getMeasurementStart(meas) stopTime = siteData.getMeasurementEnd(meas) dataChans = (options["chans"] if len(options["chans"]) > 0 else reader.getChannels()) timeData = reader.getPhysicalData(startTime, stopTime, chans=dataChans) timeData.addComment(breakComment()) timeData.addComment("Calculating project spectra") timeData.addComment(projData.config.getConfigComment()) # apply various options applyPolarisationReversalOptions(options, timeData) applyScaleOptions(options, timeData) applyCalibrationOptions(options, cal, timeData, reader) applyFilterOptions(options, timeData) applyNotchOptions(options, timeData) # define decimation and window parameters decParams = getDecimationParameters(sampleFreq, projData.config) numLevels = decParams.numLevels winParams = getWindowParameters(decParams, projData.config) dec = Decimator(timeData, decParams) timeData.addComment( "Decimating with {} levels and {} frequencies per level". format(numLevels, decParams.freqPerLevel)) # loop through decimation levels for declevel in range(0, numLevels): # get the data for the current level check = dec.incrementLevel() if not check: break # not enough data timeData = dec.timeData # create the windower and give it window parameters for current level sampleFreqDec = dec.sampleFreq win = Windower( datetimeRef, timeData, winParams.getWindowSize(declevel), winParams.getOverlap(declevel), ) if win.numWindows < 2: break # do no more decimation # print information and add some comments projectText( "Calculating spectra for decimation level {}".format( declevel)) timeData.addComment( "Evaluation frequencies for this level {}".format( listToString( decParams.getEvalFrequenciesForLevel( declevel)))) timeData.addComment( "Windowing with window size {} samples and overlap {} samples" .format( winParams.getWindowSize(declevel), winParams.getOverlap(declevel), )) if projData.config.configParams["Spectra"]["applywindow"]: timeData.addComment( "Performing fourier transform with window function {}" .format(projData.config.configParams["Spectra"] ["windowfunc"])) else: timeData.addComment( "Performing fourier transform with no window function" ) # collect time data timeDataList = [] for iW in range(0, win.numWindows): timeDataList.append(win.getData(iW)) # open spectra file for saving specPath = os.path.join( siteData.getMeasurementSpecPath(meas), options["specdir"]) specWrite = SpectrumWriter(specPath, datetimeRef) specWrite.openBinaryForWriting( "spectra", declevel, sampleFreqDec, winParams.getWindowSize(declevel), winParams.getOverlap(declevel), win.winOffset, win.numWindows, dataChans, ) if options["ncores"] > 0: specDataList = multiSpectra( options["ncores"], timeDataList, sampleFreqDec, winParams.getWindowSize(declevel), projData.config.configParams, ) else: specDataList = calculateWindowSpectra( timeDataList, sampleFreqDec, winParams.getWindowSize(declevel), projData.config.configParams, ) # write out to spectra file for iW in range(0, win.numWindows): specWrite.writeBinary(specDataList[iW]) specWrite.writeCommentsFile(timeData.getComments()) specWrite.closeFile()
def viewSpectraStack(projData: ProjectData, site: str, meas: str, **kwargs) -> Union[Figure, None]: """View spectra stacks for a measurement Parameters ---------- projData : projecData The project data site : str The site to view meas: str The measurement of the site to view chans : List[str], optional Channels to plot declevel : int, optional Decimation level to plot numstacks : int, optional The number of windows to stack coherences : List[List[str]], optional A list of coherences to add, specified as [["Ex", "Hy"], ["Ey", "Hx"]] specdir : str, optional String that specifies spectra directory for the measurement show : bool, optional Show the spectra plot save : bool, optional Save the plot to the images directory plotoptions : Dict, optional Dictionary of plot options Returns ------- matplotlib.pyplot.figure or None A matplotlib figure unless the plot is not shown and is saved, in which case None and the figure is closed. If no data was found, then None is returned. """ from resistics.common.plot import savePlot, plotOptionsSpec, colorbarMultiline options = {} options["chans"] = [] options["declevel"] = 0 options["numstacks"] = 10 options["coherences"] = [] options["specdir"] = projData.config.configParams["Spectra"]["specdir"] options["show"] = True options["save"] = False options["plotoptions"] = plotOptionsSpec() options = parseKeywords(options, kwargs) projectText("Plotting spectra stack for measurement {} and site {}".format( meas, site)) specReader = getSpecReader(projData, site, meas, **options) if specReader is None: return None # channels dataChans = specReader.getChannels() if len(options["chans"]) > 0: dataChans = options["chans"] numChans = len(dataChans) # get windows numWindows = specReader.getNumWindows() sampleFreqDec = specReader.getSampleFreq() f = specReader.getFrequencyArray() # calculate num of windows to stack in each set stackSize = int(np.floor(1.0 * numWindows / options["numstacks"])) if stackSize == 0: projectWarning("Too few windows for number of stacks {}".format( options["numstacks"])) options["numstacks"] = numWindows stackSize = 1 projectWarning("Number of stacks changed to {}".format( options["numstacks"])) # calculate number of rows - in case interested in coherences too nrows = (2 if len(options["coherences"]) == 0 else 2 + np.ceil(1.0 * len(options["coherences"]) / numChans)) # setup the figure plotfonts = options["plotoptions"]["plotfonts"] cmap = colorbarMultiline() fig = plt.figure(figsize=options["plotoptions"]["figsize"]) st = fig.suptitle( "Spectra stack, fs = {:.6f} [Hz], decimation level = {:2d}, windows in each set = {:d}" .format(sampleFreqDec, options["declevel"], stackSize), fontsize=plotfonts["suptitle"], ) st.set_y(0.98) # do the stacking for iP in range(0, options["numstacks"]): stackStart = iP * stackSize stackStop = min(stackStart + stackSize, numWindows) color = cmap(iP / options["numstacks"]) # dictionaries to hold data for this section stackedData = {} ampData = {} phaseData = {} powerData = {} # assign initial zeros for c in dataChans: stackedData[c] = np.zeros(shape=(specReader.getDataSize()), dtype="complex") ampData[c] = np.zeros(shape=(specReader.getDataSize()), dtype="complex") phaseData[c] = np.zeros(shape=(specReader.getDataSize()), dtype="complex") for c2 in dataChans: powerData[c + c2] = np.zeros(shape=(specReader.getDataSize()), dtype="complex") # now stack the data and create nice plots for iW in range(stackStart, stackStop): winData = specReader.readBinaryWindowLocal(iW) for c in dataChans: stackedData[c] += winData.data[c] ampData[c] += np.absolute(winData.data[c]) phaseData[c] += np.angle(winData.data[c]) * (180.0 / np.pi) # get coherency data for c2 in dataChans: powerData[c + c2] += winData.data[c] * np.conjugate( winData.data[c2]) if iW == stackStart: startTime = winData.startTime if iW == stackStop - 1: stopTime = winData.stopTime # scale powers and stacks ampLim = options["plotoptions"]["amplim"] for idx, c in enumerate(dataChans): stackedData[c] = stackedData[c] / (stackStop - stackStart) ampData[c] = ampData[c] / (stackStop - stackStart) phaseData[c] = phaseData[c] / (stackStop - stackStart) for c2 in dataChans: # normalisation powerData[c + c2] = 2 * powerData[c + c2] / (stackStop - stackStart) # normalisation powerData[c + c2][[0, -1]] = powerData[c + c2][[0, -1]] / 2 # plot ax1 = plt.subplot(nrows, numChans, idx + 1) plt.title("Amplitude {}".format(c), fontsize=plotfonts["title"]) h = ax1.semilogy( f, ampData[c], color=color, label="{} to {}".format( startTime.strftime("%m-%d %H:%M:%S"), stopTime.strftime("%m-%d %H:%M:%S"), ), ) if len(ampLim) == 2: ax1.set_ylim(ampLim) else: ax1.set_ylim(0.01, 1000) ax1.set_xlim(0, sampleFreqDec / 2.0) if isMagnetic(c): ax1.set_ylabel("Amplitude [nT]", fontsize=plotfonts["axisLabel"]) else: ax1.set_ylabel("Amplitude [mV/km]", fontsize=plotfonts["axisLabel"]) ax1.set_xlabel("Frequency [Hz]", fontsize=plotfonts["axisLabel"]) plt.grid(True) # set tick sizes for label in ax1.get_xticklabels() + ax1.get_yticklabels(): label.set_fontsize(plotfonts["axisTicks"]) # plot phase ax2 = plt.subplot(nrows, numChans, numChans + idx + 1) plt.title("Phase {}".format(c), fontsize=plotfonts["title"]) ax2.plot( f, phaseData[c], color=color, label="{} to {}".format( startTime.strftime("%m-%d %H:%M:%S"), stopTime.strftime("%m-%d %H:%M:%S"), ), ) ax2.set_ylim(-180, 180) ax2.set_xlim(0, sampleFreqDec / 2.0) ax2.set_ylabel("Phase [degrees]", fontsize=plotfonts["axisLabel"]) ax2.set_xlabel("Frequency [Hz]", fontsize=plotfonts["axisLabel"]) plt.grid(True) # set tick sizes for label in ax2.get_xticklabels() + ax2.get_yticklabels(): label.set_fontsize(plotfonts["axisTicks"]) # plot coherences for idx, coh in enumerate(options["coherences"]): c = coh[0] c2 = coh[1] cohNom = np.power(np.absolute(powerData[c + c2]), 2) cohDenom = powerData[c + c] * powerData[c2 + c2] coherence = cohNom / cohDenom ax = plt.subplot(nrows, numChans, 2 * numChans + idx + 1) plt.title("Coherence {} - {}".format(c, c2), fontsize=plotfonts["title"]) ax.plot( f, coherence, color=color, label="{} to {}".format( startTime.strftime("%m-%d %H:%M:%S"), stopTime.strftime("%m-%d %H:%M:%S"), ), ) ax.set_ylim(0, 1.1) ax.set_xlim(0, sampleFreqDec / 2) ax.set_ylabel("Coherence", fontsize=plotfonts["axisLabel"]) ax.set_xlabel("Frequency [Hz]", fontsize=plotfonts["axisLabel"]) plt.grid(True) # set tick sizes for label in ax.get_xticklabels() + ax.get_yticklabels(): label.set_fontsize(plotfonts["axisTicks"]) # fig legend and layout ax = plt.gca() h, l = ax.get_legend_handles_labels() fig.tight_layout(rect=[0.01, 0.01, 0.98, 0.81]) # legend legax = plt.axes(position=[0.01, 0.82, 0.98, 0.12], in_layout=False) plt.tick_params(left=False, labelleft=False, bottom=False, labelbottom=False) plt.box(False) legax.legend(h, l, ncol=4, loc="upper center", fontsize=plotfonts["legend"]) # plot show and save if options["save"]: impath = projData.imagePath filename = "spectraStack_{}_{}_dec{}_{}".format( site, meas, options["declevel"], options["specdir"]) savename = savePlot(impath, filename, fig) projectText("Image saved to file {}".format(savename)) if options["show"]: plt.show(block=options["plotoptions"]["block"]) if not options["show"] and options["save"]: plt.close(fig) return None return fig
def viewSpectraSection(projData: ProjectData, site: str, meas: str, **kwargs) -> Union[Figure, None]: """View spectra section for a measurement Parameters ---------- projData : projecData The project data site : str The site to view meas: str The measurement of the site to view chans : List[str], optional Channels to plot declevel : int, optional Decimation level to plot specdir : str, optional String that specifies spectra directory for the measurement show : bool, optional Show the spectra plot save : bool, optional Save the plot to the images directory plotoptions : Dict, optional Dictionary of plot options Returns ------- matplotlib.pyplot.figure or None A matplotlib figure unless the plot is not shown and is saved, in which case None and the figure is closed. If no data was found, then None is returned. """ from matplotlib.colors import LogNorm from resistics.common.plot import savePlot, plotOptionsSpec, colorbar2dSpectra options = {} options["chans"] = [] options["declevel"] = 0 options["specdir"] = projData.config.configParams["Spectra"]["specdir"] options["show"] = True options["save"] = False options["plotoptions"] = plotOptionsSpec() options = parseKeywords(options, kwargs) projectText( "Plotting spectra section for measurement {} and site {}".format( meas, site)) specReader = getSpecReader(projData, site, meas, **options) if specReader is None: return None # channels dataChans = specReader.getChannels() if len(options["chans"]) > 0: dataChans = options["chans"] # get windows numWindows = specReader.getNumWindows() sampleFreqDec = specReader.getSampleFreq() f = specReader.getFrequencyArray() # if plotting a section, ignore plotwindow if numWindows > 250: windows = list( np.linspace(0, numWindows, 250, endpoint=False, dtype=np.int32)) else: windows = np.arange(0, numWindows) # create figure plotfonts = options["plotoptions"]["plotfonts"] fig = plt.figure(figsize=options["plotoptions"]["figsize"]) st = fig.suptitle( "Spectra section, site = {}, meas = {}, fs = {:.2f} [Hz], decimation level = {:2d}, windows = {:d}, {} to {}" .format( site, meas, sampleFreqDec, options["declevel"], len(windows), windows[0], windows[-1], ), fontsize=plotfonts["suptitle"], ) st.set_y(0.98) # collect the data specData = np.empty(shape=(len(windows), len(dataChans), specReader.getDataSize()), dtype="complex") dates = [] for idx, iW in enumerate(windows): winData = specReader.readBinaryWindowLocal(iW) for cIdx, chan in enumerate(dataChans): specData[idx, cIdx, :] = winData.data[chan] dates.append(winData.startTime) ampLim = options["plotoptions"]["amplim"] for idx, chan in enumerate(dataChans): ax = plt.subplot(1, len(dataChans), idx + 1) plotData = np.transpose(np.absolute(np.squeeze(specData[:, idx, :]))) if len(ampLim) == 2: plt.pcolor( dates, f, plotData, norm=LogNorm(vmin=ampLim[0], vmax=ampLim[1]), cmap=colorbar2dSpectra(), ) else: plt.pcolor( dates, f, plotData, norm=LogNorm(vmin=plotData.min(), vmax=plotData.max()), cmap=colorbar2dSpectra(), ) cb = plt.colorbar() cb.ax.tick_params(labelsize=plotfonts["axisTicks"]) # set axis limits ax.set_ylim(0, specReader.getSampleFreq() / 2.0) ax.set_xlim([dates[0], dates[-1]]) if isMagnetic(chan): plt.title("Amplitude {} [nT]".format(chan), fontsize=plotfonts["title"]) else: plt.title("Amplitude {} [mV/km]".format(chan), fontsize=plotfonts["title"]) ax.set_ylabel("Frequency [Hz]", fontsize=plotfonts["axisLabel"]) ax.set_xlabel("Time", fontsize=plotfonts["axisLabel"]) # set tick sizes for label in ax.get_xticklabels() + ax.get_yticklabels(): label.set_fontsize(plotfonts["axisTicks"]) plt.grid(True) # plot format fig.autofmt_xdate(rotation=90, ha="center") fig.tight_layout(rect=[0.02, 0.02, 0.96, 0.92]) # plot show and save if options["save"]: impath = projData.imagePath filename = "spectraSection_{}_{}_dec{}_{}".format( site, meas, options["declevel"], options["specdir"]) savename = savePlot(impath, filename, fig) projectText("Image saved to file {}".format(savename)) if options["show"]: plt.show(block=options["plotoptions"]["block"]) if not options["show"] and options["save"]: plt.close(fig) return None return fig
def viewStatistic( projData: ProjectData, site: str, sampleFreq: Union[int, float], stat: str, **kwargs ) -> Union[Figure, None]: """View statistic data for a single sampling frequency of a site Parameters ---------- projData : ProjectData A project instance site : str The site for which to plot statistics stat : str The statistic to plot sampleFreq : float The sampling frequency for which to plot statistics declevel : int The decimation level to plot eFreqI : int The evaluation frequency index specdir : str The spectra directory maskname : str Mask name clim : List, optional Limits for colourbar axis xlim : List, optional Limits for the x axis ylim : List, optional Limits for the y axis colortitle : str, optional Title for the colourbar show : bool, optional Show the spectra plot save : bool, optional Save the plot to the images directory plotoptions : Dict, optional Dictionary of plot options Returns ------- matplotlib.pyplot.figure or None A matplotlib figure unless the plot is not shown and is saved, in which case None and the figure is closed. If no data was found, None. """ from resistics.common.plot import savePlot, plotOptionsSpec, getPlotRowsAndCols options = {} options["declevel"] = 0 options["eFreqI"] = 0 options["specdir"] = projData.config.configParams["Spectra"]["specdir"] options["maskname"] = "" options["clim"] = [] options["xlim"] = [] options["ylim"] = [] options["colortitle"] = "" options["show"] = True options["save"] = False options["plotoptions"] = plotOptionsSpec() options = parseKeywords(options, kwargs) projectText( "Plotting statistic {} for site {} and sampling frequency {}".format( stat, site, sampleFreq ) ) statData = getStatisticDataForSampleFreq( projData, site, sampleFreq, stat, declevel=options["declevel"], specdir=options["specdir"], ) statMeas = list(statData.keys()) if len(statMeas) == 0: projectWarning( "No statistic files for site {}, sampling frequency {}, statistic {} and decimation level {}".format( site, sampleFreq, stat, options["declevel"] ) ) return None # get the evaluation frequency eFreq = statData[statMeas[0]].evalFreq[options["eFreqI"]] # get the mask data maskWindows = [] if options["maskname"] != "": maskData = getMaskData(projData, site, options["maskname"], sampleFreq) maskWindows = maskData.getMaskWindowsFreq( options["declevel"], options["eFreqI"] ) # setup the figure plotfonts = options["plotoptions"]["plotfonts"] fig = plt.figure(figsize=options["plotoptions"]["figsize"]) # get the date limits siteData = projData.getSiteData(site) if len(options["xlim"]) == 0: start = siteData.getMeasurementStart(statMeas[0]) end = siteData.getMeasurementEnd(statMeas[0]) for meas in statMeas: start = min(start, siteData.getMeasurementStart(meas)) end = max(end, siteData.getMeasurementEnd(meas)) options["xlim"] = [start, end] # do the plots for meas in statMeas: statData[meas].view( options["eFreqI"], fig=fig, xlim=options["xlim"], ylim=options["ylim"], clim=options["clim"], label=meas, plotfonts=options["plotoptions"]["plotfonts"], maskwindows=maskWindows, ) # add a legened plt.legend(markerscale=4, fontsize=plotfonts["legend"]) # do the title after all the plots fig.suptitle( "{} values for {}, sampling frequency = {:.2f} Hz, decimation level = {} and evaluation frequency {} Hz".format( stat, site, sampleFreq, options["declevel"], eFreq ), fontsize=plotfonts["suptitle"], ) # plot format, show and save fig.tight_layout(rect=[0.02, 0.02, 0.98, 0.92]) if options["save"]: impath = projData.imagePath sampleFreqStr = fileFormatSampleFreq(sampleFreq) filename = "stat_{:s}_{:s}_{:s}_dec{:d}_efreq{:d}_{:s}".format( stat, site, sampleFreqStr, options["declevel"], options["eFreqI"], options["specdir"], ) if options["maskname"] != "": filename = "{}_{}".format(filename, options["maskname"]) savename = savePlot(impath, filename, fig) projectText("Image saved to file {}".format(savename)) if options["show"]: plt.show(block=options["plotoptions"]["block"]) if not options["show"] and options["save"]: plt.close(fig) return None return fig
def chatterjeeMachlerHadi(X, y, **kwargs): r"""Regression based on Hadi distances Another regression method based on Hadi distances implemented from the paper A Re-Weighted Least Squares Method for Robust Regression Estimation Billor, Hadi """ import scipy.stats as stats from resistics.common.math import eps from resistics.regression.robust import defaultOptions, applyWeights import numpy.linalg as linalg # basic info options = parseKeywords(defaultOptions(), kwargs, printkw=False) # for the distances, will use absX - do this before adding intercept term # a column of all ones will cause problems with non full rank covariance matrices absX = np.absolute(X) # now calculate p and n n = absX.shape[0] p = absX.shape[1] # we treat the X matrix as a multivariate matrix with n observations and p variables # first need to find a basic subset free of outliers correctionFactor = 1 + (1.0 * (p + 1) / (n - p)) + (2.0 / (n - 1 - 3 * p)) chi = stats.chi2(p, 0) alpha = 0.05 chi2bound = correctionFactor * chi.pdf(alpha / n) # calculate h, this is the size of the firt basic subset # note that this is the value h, the index of the hth element is h-1 h = int(1.0 * (n + p + 1) / 2) # here, only want the integer part of this # need to get the coordinatewise medians - this is the median of the columns medians = np.median(absX) # now compute the matrix to help calculate the distance A = np.zeros(shape=(p, p)) for i in range(0, n): tmp = absX[i, :] - medians A += np.outer(tmp, tmp) A = 1.0 / (n - 1) * A # now calculate initial distances dInit = calculateDistCMH(n, absX, medians, A) # now get the h smallest values of d sortOrder = np.argsort(dInit) indices = sortOrder[0:h] means = np.average(absX[indices, :]) covariance = np.cov( absX[indices], rowvar=False) # observations in rows, columns are variables dH = calculateDistCMH(n, absX, means, covariance) # rearrange into n observations into order and partition into two initial subsets # one subset p+1, the n-p-1 sortOrder = np.argsort(dH) indicesBasic = sortOrder[:p + 1] # there is a rank issue here, but ignore for now - natural observations will presumably be full rank means = np.average(absX[indicesBasic, :]) covariance = np.cov(absX[indicesBasic], rowvar=False) dist = calculateDistCMH(n, absX, means, covariance) # create the basic subset r = p + 2 increment = (h - r) / 100 if increment < 1: increment = 1 # here, limiting to 100 iterations of this while r <= h: sortOrder = np.argsort(dist) indices = sortOrder[:r] # indices start from zero, hence the - 1 means = np.average(absX[indices]) covariance = np.cov(absX[indices], rowvar=False) dist = calculateDistCMH(n, absX, means, covariance) if h - r > 0 and h - r < increment: r = h else: r += increment # now the second part = add more points and exclude outliers to basic set # all distances above r+1 = outliers # r = p + 1 # increment = (n - 1 - r)/100 while r < n: sortOrder = np.argsort(dist) dist2 = np.power(dist, 2) if dist2[sortOrder[r]] > chi2bound: break # then leave, everything else is an outlier - it would be good if this could be saved somehow # otherwise, continue adding points sortOrder = np.argsort(dist) indices = sortOrder[:r] means = np.average(absX[indices]) covariance = np.cov(absX[indices], rowvar=False) dist = calculateDistCMH(n, absX, means, covariance) if n - 1 - r > 0 and n - 1 - r < increment: r = n - 1 else: r += increment # now with the Hadi distances calculated, can proceed to do the robust regression # normalise and manipulate Hadi distances dist = dist / np.max(dist) # for the median, use the basic subset # indicesBasic = sortOrder[:r] # distMedian = np.median(dist[indicesBasic]) # I am using on indicesBasic distMedian = np.median( dist) # the paper suggests using the median of the complete tmp = np.maximum(dist, np.ones(shape=(n)) * distMedian) dist = np.reciprocal(tmp) dist2 = np.power(dist, 2) dist = dist2 / np.sum(dist2) # calculate first set of weights - this is simply dist weights = dist # now add the additional constant intercept column if required if options["intercept"] == True: # add column of ones for constant term X = np.hstack((np.ones(shape=(X.shape[0], 1), dtype="complex"), X)) n = X.shape[0] p = X.shape[1] # iteratively weighted least squares iteration = 0 while iteration < options["maxiter"]: # do the weighted least-squares Anew, ynew = applyWeights(X, y, weights) paramsNew, squareResidNew, rankNew, sNew = linalg.lstsq(Anew, ynew, rcond=None) residsNew = y - np.dot(X, paramsNew) # check residsNew to make sure not all zeros (i.e. will happen in undetermined or equally determined system) if np.sum(np.absolute(residsNew)) < eps(): # then return everything here return paramsNew, residsNew, weights residsAbs = np.absolute(residsNew) residsSquare = np.power(residsAbs, 2) residsNew = residsSquare / np.sum(residsSquare) residsMedian = np.median(residsAbs) # calculate the new weights tmpDenom = np.maximum(residsNew, np.ones(shape=(n), dtype="float") * residsMedian) tmp = (1 - dist) / tmpDenom weightsNew = np.power(tmp, 2) / np.sum(np.power(tmp, 2)) # increment iteration iteration = iteration + 1 weights = weightsNew params = paramsNew if iteration > 1: # check to see whether the change is smaller than the tolerance changeResids = linalg.norm(residsNew - resids) / linalg.norm(residsNew) if changeResids < eps(): # update resids resids = residsNew break # update resids resids = residsNew # at the end, return the components return params, resids, weights
def viewStatisticHistogram( projData: ProjectData, site: str, sampleFreq: float, stat: str, **kwargs ) -> Union[Figure, None]: """View statistic histograms for a single sampling frequency of a site Parameters ---------- projData : ProjectData A project instance site : str The site for which to plot statistics stat : str The statistic to plot sampleFreq : float The sampling frequency for which to plot statistics declevel : int The decimation level to plot eFreqI : int The evaluation frequency index specdir : str The spectra directory maskname : str Mask name numbins : int The number of bins for the histogram data binning xlim : List, optional Limits for the x axis maxcols : int The maximum number of columns in the plots show : bool, optional Show the spectra plot save : bool, optional Save the plot to the images directory plotoptions : Dict, optional Dictionary of plot options Returns ------- matplotlib.pyplot.figure or None A matplotlib figure unless the plot is not shown and is saved, in which case None. If no data was found, None. """ from resistics.common.plot import savePlot, plotOptionsSpec, getPlotRowsAndCols options = {} options["declevel"] = 0 options["eFreqI"] = 0 options["specdir"] = projData.config.configParams["Spectra"]["specdir"] options["maskname"] = "" options["numbins"] = 40 options["xlim"] = [] options["maxcols"] = 4 options["show"] = True options["save"] = False options["plotoptions"] = plotOptionsSpec() options = parseKeywords(options, kwargs) projectText( "Plotting histogram for statistic {}, site {} and sampling frequency {}".format( stat, site, sampleFreq ) ) statData = getStatisticDataForSampleFreq( projData, site, sampleFreq, stat, declevel=options["declevel"], specdir=options["specdir"], ) statMeas = list(statData.keys()) if len(statMeas) == 0: projectWarning( "No statistic files for site {}, sampling frequency {}, statistic {} and decimation level {}".format( site, sampleFreq, stat, options["declevel"] ) ) return None # get the statistic components statComponents = statData[statMeas[0]].winStats # get the evaluation frequency eFreq = statData[statMeas[0]].evalFreq[options["eFreqI"]] # get the mask data maskWindows = [] if options["maskname"] != "": maskData = getMaskData(projData, site, options["maskname"], sampleFreq) maskWindows = maskData.getMaskWindowsFreq( options["declevel"], options["eFreqI"] ) # plot information nrows, ncols = getPlotRowsAndCols(options["maxcols"], len(statComponents)) numbins = options["numbins"] plotfonts = options["plotoptions"]["plotfonts"] fig = plt.figure(figsize=options["plotoptions"]["figsize"]) # suptitle st = fig.suptitle( "{} histogram for {}, sampling frequency {} Hz, decimation level {} and evaluation frequency {} Hz".format( stat, site, sampleFreq, options["declevel"], eFreq ), fontsize=plotfonts["suptitle"], ) st.set_y(0.98) # now plot the data for idx, val in enumerate(statComponents): ax = plt.subplot(nrows, ncols, idx + 1) plt.title("Histogram {}".format(val), fontsize=plotfonts["title"]) plotData = np.empty(shape=(0)) for meas in statMeas: stats = statData[meas].getStats(maskwindows=maskWindows) plotData = np.concatenate( (plotData, np.squeeze(stats[:, options["eFreqI"], idx])) ) # remove infinities and nans plotData = plotData[np.isfinite(plotData)] # x axis options xlim = ( options["xlim"] if len(options["xlim"]) > 0 else [np.min(plotData), np.max(plotData)] ) plt.xlim(xlim) plt.xlabel("Value", fontsize=plotfonts["axisLabel"]) # now plot with xlim in mind plt.hist(plotData, numbins, range=xlim, facecolor="red", alpha=0.75) plt.grid() # y axis options plt.ylabel("Count", fontsize=plotfonts["axisLabel"]) # set tick sizes for label in ax.get_xticklabels() + ax.get_yticklabels(): label.set_fontsize(plotfonts["axisTicks"]) # plot format, show and save fig.tight_layout(rect=[0.02, 0.02, 0.98, 0.92]) if options["save"]: impath = projData.imagePath sampleFreqStr = fileFormatSampleFreq(sampleFreq) filename = "statHist_{:s}_{:s}_{:s}_dec{:d}_efreq{:d}_{:s}".format( stat, site, sampleFreqStr, options["declevel"], options["eFreqI"], options["specdir"], ) if options["maskname"] != "": filename = "{}_{}".format(filename, options["maskname"]) savename = savePlot(impath, filename, fig) projectText("Image saved to file {}".format(savename)) if options["show"]: plt.show(block=options["plotoptions"]["block"]) if not options["show"] and options["save"]: plt.close(fig) return None return fig
def calculateStatistics(projData: ProjectData, **kwargs): """Calculate statistics for sites Parameters ---------- projData : ProjectData A project data instance sites : List[str], optional A list of sites to calculate statistics for sampleFreqs : List[float], optional List of sampling frequencies for which to calculate statistics specdir : str, optional The spectra directory for which to calculate statistics stats : List[str], optional The statistics to calculate out. Acceptable values are: "absvalEqn" "coherence", "psd", "poldir", "transFunc", "resPhase", "partialcoh". Configuration file values are used by default. ncores : int, optional The number of cores to run the transfer function calculations on """ from resistics.statistics.io import StatisticIO from resistics.project.shortcuts import getDecimationParameters options = {} options["sites"] = projData.getSites() options["sampleFreqs"] = projData.getSampleFreqs() options["chans"] = [] options["specdir"] = projData.config.configParams["Spectra"]["specdir"] options["stats"] = projData.config.configParams["Statistics"]["stats"] options["ncores"] = projData.config.getStatisticCores() options = parseKeywords(options, kwargs) projectText( "Calculating stats: {} for sites: {}".format( listToString(options["stats"]), listToString(options["sites"]) ) ) # loop through sites and calculate statistics statIO = StatisticIO() for site in options["sites"]: siteData = projData.getSiteData(site) measurements = siteData.getMeasurements() for meas in measurements: sampleFreq = siteData.getMeasurementSampleFreq(meas) if sampleFreq not in options["sampleFreqs"]: continue projectText( "Calculating stats for site {}, measurement {}".format(site, meas) ) decParams = getDecimationParameters(sampleFreq, projData.config) numLevels = decParams.numLevels specReader = SpectrumReader( os.path.join(siteData.getMeasurementSpecPath(meas), options["specdir"]) ) # calculate statistics for decimation level if spectra file exists for declevel in range(0, numLevels): check = specReader.openBinaryForReading("spectra", declevel) if not check: continue refTime = specReader.getReferenceTime() winSize = specReader.getWindowSize() winOlap = specReader.getWindowOverlap() numWindows = specReader.getNumWindows() sampleFreqDec = specReader.getSampleFreq() evalFreq = decParams.getEvalFrequenciesForLevel(declevel) # dictionary for saving statistic data statData = {} for stat in options["stats"]: statElements = getStatElements(stat) statData[stat] = StatisticData( stat, refTime, sampleFreqDec, winSize, winOlap ) statData[stat].setStatParams(numWindows, statElements, evalFreq) statData[stat].comments = specReader.getComments() statData[stat].addComment(projData.config.getConfigComment()) statData[stat].addComment("Calculating statistic: {}".format(stat)) statData[stat].addComment( "Statistic components: {}".format(listToString(statElements)) ) # get all the spectra data in batch and process all the windows spectraData, globalIndices = specReader.readBinaryBatchGlobal() if options["ncores"] > 0: out = multiStatistics( options["ncores"], spectraData, evalFreq, options["stats"] ) for iW in range(numWindows): for stat in options["stats"]: statData[stat].addStat(iW, globalIndices[iW], out[iW][stat]) else: statCalculator = StatisticCalculator() for iW in range(numWindows): winSpecData = spectraData[iW] winStatData = calculateWindowStatistics( winSpecData, evalFreq, options["stats"], statCalculator=statCalculator, ) for stat in options["stats"]: statData[stat].addStat( iW, globalIndices[iW], winStatData[stat] ) specReader.closeFile() # save statistic for stat in options["stats"]: statIO.setDatapath( os.path.join( siteData.getMeasurementStatPath(meas), options["specdir"] ) ) statIO.write(statData[stat], declevel)
def calculateMask(projData: ProjectData, maskData: MaskData, **kwargs): """Calculate masks sites Parameters ---------- projData : projectData A project instance maskData : MaskData A mask data instance sites : List[str], optional A list of sites to calculate masks for specdir : str, optional The spectra directory for which to calculate statistics """ options = {} options["sites"] = projData.getSites() options["specdir"] = projData.config.configParams["Spectra"]["specdir"] options = parseKeywords(options, kwargs) # create a maskCalculator object maskCalc = MaskCalculator(projData, maskData, specdir=options["specdir"]) maskIO = MaskIO() sampleFreq = maskData.sampleFreq # loop over sites for site in options["sites"]: # see if there is a sample freq siteData = projData.getSiteData(site) siteSampleFreqs = siteData.getSampleFreqs() if sampleFreq not in siteSampleFreqs: continue # decimation and window parameters decParams = getDecimationParameters(sampleFreq, projData.config) decParams.printInfo() winParams = getWindowParameters(decParams, projData.config) # clear previous windows from maskCalc maskCalc.clearMaskWindows() # calculate masked windows maskCalc.applyConstraints(site) maskCalc.maskData.printInfo() # write maskIO file maskIO.datapath = os.path.join( siteData.getSpecdirMaskPath(options["specdir"])) maskIO.write(maskCalc.maskData) # test with the window selector winSelector = WindowSelector(projData, sampleFreq, decParams, winParams, specdir=options["specdir"]) winSelector.setSites([site]) winSelector.addWindowMask(site, maskData.maskName) winSelector.calcSharedWindows() winSelector.printInfo() winSelector.printDatetimeConstraints() winSelector.printWindowMasks() winSelector.printSharedWindows() winSelector.printWindowsForFrequency()
def calculateRemoteStatistics(projData: ProjectData, remoteSite: str, **kwargs): """Calculate statistics involving a remote reference site Parameters ---------- projData : ProjectData A project data instance remoteSite : str The name of the site to use as the remote site sites : List[str], optional A list of sites to calculate statistics for sampleFreqs : List[float], optional List of sampling frequencies for which to calculate statistics specdir : str, optional The spectra directory for which to calculate statistics remotestats : List[str], optional The statistics to calculate out. Acceptable statistics are: "RR_coherence", "RR_coherenceEqn", "RR_absvalEqn", "RR_transferFunction", "RR_resPhase". Configuration file values are used by default. """ from resistics.statistics.io import StatisticIO from resistics.statistics.calculator import StatisticCalculator from resistics.project.shortcuts import ( getDecimationParameters, getWindowParameters, getWindowSelector, ) options = {} options["sites"] = projData.getSites() options["sampleFreqs"] = projData.getSampleFreqs() options["chans"] = [] options["specdir"] = projData.config.configParams["Spectra"]["specdir"] options["remotestats"] = projData.config.configParams["Statistics"]["remotestats"] options["ncores"] = projData.config.getStatisticCores() options = parseKeywords(options, kwargs) projectText( "Calculating stats: {} for sites: {} with remote site {}".format( listToString(options["remotestats"]), listToString(options["sites"]), remoteSite, ) ) statIO = StatisticIO() for site in options["sites"]: siteData = projData.getSiteData(site) measurements = siteData.getMeasurements() for meas in measurements: sampleFreq = siteData.getMeasurementSampleFreq(meas) if sampleFreq not in options["sampleFreqs"]: continue projectText( "Calculating stats for site {}, measurement {} with reference {}".format( site, meas, remoteSite ) ) # decimation and window parameters decParams = getDecimationParameters(sampleFreq, projData.config) numLevels = decParams.numLevels winParams = getWindowParameters(decParams, projData.config) # create the window selector and find the shared windows winSelector = getWindowSelector(projData, decParams, winParams) winSelector.setSites([site, remoteSite]) winSelector.calcSharedWindows() # create the spectrum reader specReader = SpectrumReader( os.path.join(siteData.getMeasurementSpecPath(meas), options["specdir"]) ) # calculate statistics for decimation level if spectra file exists for declevel in range(0, numLevels): check = specReader.openBinaryForReading("spectra", declevel) if not check: continue # information regarding only this spectra file refTime = specReader.getReferenceTime() winSize = specReader.getWindowSize() winOlap = specReader.getWindowOverlap() numWindows = specReader.getNumWindows() evalFreq = decParams.getEvalFrequenciesForLevel(declevel) sampleFreqDec = specReader.getSampleFreq() globalOffset = specReader.getGlobalOffset() # find size of the intersection between the windows in this spectra file and the shared windows sharedWindows = winSelector.getSharedWindowsLevel(declevel) sharedWindowsMeas = sharedWindows.intersection( set(np.arange(globalOffset, globalOffset + numWindows)) ) sharedWindowsMeas = sorted(list(sharedWindowsMeas)) numSharedWindows = len(sharedWindowsMeas) statData = {} # create the statistic handlers for stat in options["remotestats"]: statElements = getStatElements(stat) statData[stat] = StatisticData( stat, refTime, sampleFreqDec, winSize, winOlap ) # with remote reference the number of windows is number of shared windows statData[stat].setStatParams( numSharedWindows, statElements, evalFreq ) statData[stat].comments = specReader.getComments() statData[stat].addComment(projData.config.getConfigComment()) statData[stat].addComment( "Calculating remote statistic: {}".format(stat) ) statData[stat].addComment( "Statistic components: {}".format(listToString(statElements)) ) # collect the spectra data spectraData, _globalIndices = specReader.readBinaryBatchGlobal( sharedWindowsMeas ) remoteData = [] for globalWindow in sharedWindowsMeas: _, remoteReader = winSelector.getSpecReaderForWindow( remoteSite, declevel, globalWindow ) remoteData.append(remoteReader.readBinaryWindowGlobal(globalWindow)) # calculate if options["ncores"] > 0: out = multiStatistics( options["ncores"], spectraData, evalFreq, options["remotestats"], remoteData=remoteData, ) for iW, globalWindow in enumerate(sharedWindowsMeas): for stat in options["remotestats"]: statData[stat].addStat(iW, globalWindow, out[iW][stat]) else: statCalculator = StatisticCalculator() for iW, globalWindow in enumerate(sharedWindowsMeas): winStatData = calculateWindowStatistics( spectraData[iW], evalFreq, options["remotestats"], remoteSpecData=remoteData[iW], statCalculator=statCalculator, ) for stat in options["remotestats"]: statData[stat].addStat(iW, globalWindow, winStatData[stat]) # save statistic for stat in options["remotestats"]: statIO.setDatapath( os.path.join( siteData.getMeasurementStatPath(meas), options["specdir"] ) ) statIO.write(statData[stat], declevel)
def chatterjeeMachlerMod(A, y, **kwargs): # using the weights in chaterjeeMachler means that min resids val in median(resids) # instead, use M estimate weights with a modified residual which includes a measure of leverage # for this, use residuals / (1-p)^2 # I wonder if this will have a divide by zero bug from resistics.common.math import eps from resistics.regression.moments import getLocation, getScale from resistics.regression.weights import getWeights from resistics.regression.robust import defaultOptions, applyWeights, olsModel import numpy.linalg as linalg # now calculate p and n n = A.shape[0] p = A.shape[1] pnRatio = 1.0 * p / n # calculate the projection matrix q, r = linalg.qr(A) Pdiag = np.empty(shape=(n), dtype="float") for i in range(0, n): Pdiag[i] = np.absolute(np.sum(q[i, :] * np.conjugate(q[i, :]))).real del q, r Pdiag = Pdiag / (np.max(Pdiag) + 0.0000000001) locP = getLocation(Pdiag, "median") scaleP = getScale(Pdiag, "mad") # bound = locP + 6*scaleP bound = locP + 6 * scaleP indices = np.where(Pdiag > bound) Pdiag[indices] = 0.99999 leverageMeas = np.power(1.0 - Pdiag, 2) # weights for the first iteration # this is purely based on the leverage tmp = np.ones(shape=(n), dtype="float") * pnRatio tmp = np.maximum(Pdiag, tmp) weights = np.reciprocal(tmp) # get options options = parseKeywords(defaultOptions(), kwargs, printkw=False) # generalPrint("S-Estimate", "Using weight function = {}".format(weightFnc)) if options["intercept"] == True: # add column of ones for constant term A = np.hstack((np.ones(shape=(A.shape[0], 1), dtype="complex"), A)) # iteratively weighted least squares iteration = 0 while iteration < options["maxiter"]: # do the weighted least-squares Anew, ynew = applyWeights(A, y, weights) paramsNew, squareResidNew, rankNew, sNew = linalg.lstsq(Anew, ynew, rcond=None) residsNew = y - np.dot(A, paramsNew) # check residsNew to make sure not all zeros (i.e. will happen in undetermined or equally determined system) if np.sum(np.absolute(residsNew)) < eps(): # then return everything here return paramsNew, residsNew, weights residsNew = residsNew / leverageMeas scale = getScale(residsNew, "mad0") # standardise and calculate weights residsNew = residsNew / scale weightsNew = getWeights(residsNew, "huber") # increment iteration iteration = iteration + 1 weights = weightsNew params = paramsNew if iteration > 1: # check to see whether the change is smaller than the tolerance changeResids = linalg.norm(residsNew - resids) / linalg.norm(residsNew) if changeResids < eps(): # update resids resids = residsNew break # update resids resids = residsNew # now do the same again, but with a different function # do the least squares solution params, resids, squareResid, rank, s = olsModel(A, y) resids = resids / leverageMeas resids = resids / scale weights = getWeights(resids, "trimmedMean") # iteratively weighted least squares iteration = 0 while iteration < options["maxiter"]: # do the weighted least-squares Anew, ynew = applyWeights(A, y, weights) paramsNew, squareResidNew, rankNew, sNew = linalg.lstsq(Anew, ynew, rcond=None) residsNew = y - np.dot(A, paramsNew) # check residsNew to make sure not all zeros (i.e. will happen in undetermined or equally determined system) if np.sum(np.absolute(residsNew)) < eps(): # then return everything here return paramsNew, residsNew, weights residsNew = residsNew / leverageMeas scale = getScale(residsNew, "mad0") # standardise and calculate weights residsNew = residsNew / scale weightsNew = getWeights(residsNew, options["weights"]) # increment iteration iteration = iteration + 1 weights = weightsNew params = paramsNew # check to see whether the change is smaller than the tolerance changeResids = linalg.norm(residsNew - resids) / linalg.norm(residsNew) if changeResids < eps(): # update resids resids = residsNew break # update resids resids = residsNew # at the end, return the components return params, resids, weights
def viewSpectra(projData: ProjectData, site: str, meas: str, **kwargs) -> Union[Figure, None]: """View spectra for a measurement Parameters ---------- projData : projecData The project data site : str The site to view meas: str The measurement of the site to view chans : List[str], optional Channels to plot declevel : int, optional Decimation level to plot plotwindow : int, str, Dict, optional Windows to plot (local). If int, the window with local index plotwindow will be plotted. If string and "all", all the windows will be plotted if there are less than 20 windows, otherwise 20 windows throughout the whole spectra dataset will be plotted. If a dictionary, needs to have start and stop to define a range. specdir : str, optional String that specifies spectra directory for the measurement show : bool, optional Show the spectra plot save : bool, optional Save the plot to the images directory plotoptions : Dict, optional Dictionary of plot options Returns ------- matplotlib.pyplot.figure or None A matplotlib figure unless the plot is not shown and is saved, in which case None and the figure is closed. If no data was found, then None is returned. """ from resistics.common.plot import savePlot, plotOptionsSpec, colorbarMultiline options = {} options["chans"]: List[str] = [] options["declevel"]: int = 0 options["plotwindow"]: Union[int, Dict, str] = [0] options["specdir"]: str = projData.config.configParams["Spectra"][ "specdir"] options["show"]: bool = True options["save"]: bool = False options["plotoptions"]: Dict = plotOptionsSpec() options = parseKeywords(options, kwargs) projectText("Plotting spectra for measurement {} and site {}".format( meas, site)) specReader = getSpecReader(projData, site, meas, **options) if specReader is None: return None # channels dataChans = specReader.getChannels() if len(options["chans"]) > 0: dataChans = options["chans"] numChans = len(dataChans) # get windows numWindows = specReader.getNumWindows() sampleFreqDec = specReader.getSampleFreq() # get the window data windows = options["plotwindow"] if isinstance(windows, str) and windows == "all": if numWindows > 20: windows = list( np.linspace(0, numWindows, 20, endpoint=False, dtype=np.int32)) else: windows = list(np.arange(0, numWindows)) elif isinstance(windows, int): windows = [windows] # if an integer, make it into a list elif isinstance(windows, dict): windows = list(np.arange(windows["start"], windows["stop"] + 1)) # create a figure plotfonts = options["plotoptions"]["plotfonts"] cmap = colorbarMultiline() fig = plt.figure(figsize=options["plotoptions"]["figsize"]) for iW in windows: if iW >= numWindows: break color = cmap(iW / numWindows) winData = specReader.readBinaryWindowLocal(iW) winData.view( fig=fig, chans=dataChans, label="{} to {}".format( winData.startTime.strftime("%m-%d %H:%M:%S"), winData.stopTime.strftime("%m-%d %H:%M:%S"), ), plotfonts=plotfonts, color=color, ) st = fig.suptitle( "Spectra plot, site = {}, meas = {}, fs = {:.2f} [Hz], decimation level = {:2d}" .format(site, meas, sampleFreqDec, options["declevel"]), fontsize=plotfonts["suptitle"], ) st.set_y(0.98) # put on axis labels etc for idx, chan in enumerate(dataChans): ax = plt.subplot(numChans, 1, idx + 1) plt.title("Amplitude {}".format(chan), fontsize=plotfonts["title"]) if len(options["plotoptions"]["amplim"]) == 2: ax.set_ylim(options["plotoptions"]["amplim"]) ax.set_xlim(0, specReader.getSampleFreq() / 2.0) plt.grid(True) # fig legend and formatting ax = plt.gca() h, l = ax.get_legend_handles_labels() fig.tight_layout(rect=[0.02, 0.02, 0.77, 0.92]) # legend axis legax = plt.axes(position=[0.77, 0.02, 0.23, 0.88], in_layout=False) plt.tick_params(left=False, labelleft=False, bottom=False, labelbottom="False") plt.box(False) legax.legend(h, l, loc="upper left", fontsize=plotfonts["legend"]) # plot show and save if options["save"]: impath = projData.imagePath filename = "spectraData_{}_{}_dec{}_{}".format(site, meas, options["declevel"], options["specdir"]) savename = savePlot(impath, filename, fig) projectText("Image saved to file {}".format(savename)) if options["show"]: plt.show(block=options["plotoptions"]["block"]) if not options["show"] and options["save"]: plt.close(fig) return None return fig