def setUp(self): self.cols = ["MgO", "SiO2", "CaO"] # can run into interesting singular matrix errors with bivariate random data self.tridf = normal_frame(columns=self.cols, size=100) self.bidf = self.tridf.loc[:, self.cols[:2]] self.multidf = normal_frame(columns=REE(), size=100)
def setUp(self): self.cols = ["SiO2", "CaO", "MgO", "FeO", "TiO2"] self.df = normal_frame(columns=self.cols) nans = 10 self.df.iloc[ np.random.randint(1, 10, size=nans), np.random.randint(1, len(self.cols), size=nans), ] = np.nan
def setUp(self): self.df = normal_frame( columns=["SiO2", "Na2O", "K2O", "Al2O3"], mean=[0.5, 0.04, 0.05, 0.4], size=100, ) self.df.loc[:, "Na2O + K2O"] = self.df.Na2O + self.df.K2O
def setUp(self): self.X = normal_frame(size=20).apply(close, axis=1) iris = sklearn.datasets.load_iris() self.data, self.target = iris["data"], iris["target"] svc = SVC_pipeline(probability=True) self.gs = svc.fit(self.data, self.target) self.clf = self.gs.best_estimator_
def test_LambdaTransformer(self): """Test the LambdaTransformer transfomer.""" df = normal_frame(columns=REE()).apply(close, axis=1) tmr = LambdaTransformer() for ree in [REE(), [i for i in REE() if i not in ["Eu"]]]: with self.subTest(ree=ree): out = tmr.transform(df.loc[:, ree])
def setUp(self): self.X = normal_frame(size=20).apply(close, axis=1) self.gs = test_classifier() self.y = np.ones(self.X.index.size) self.y[4:] += 1 self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( self.X, self.y, test_size=0.3, stratify=self.y) self.gs.fit(self.X_train, self.y_train) self.clf = self.gs.best_estimator_
def _get_spatiotemporal_dataframe(size, geochem_columns=[]): df = pd.DataFrame( index=range(size), columns=["Latitude", "Longitude", "Age", "AgeUncertainty", "MinAge", "MaxAge",], ) df["Latitude"] = 20 + np.random.randn(size) df["Longitude"] = 85 + np.random.randn(size) df["Age"] = 100 + np.random.randint(0, 100, size) df["AgeUncertainty"] = 2 + np.random.randn(size) df["MinAge"] = df["Age"] - 2 df["MaxAge"] = df["Age"] + 2 if geochem_columns: df[geochem_columns] = normal_frame(columns=geochem_columns, size=size) return df
def setUp(self): cols = [ "MgO", "SiO2", "CaO", "FeO", "Ti", "Hf", "Zr", "H2O", "Sr87_Sr86", "87Sr/86Sr", "87Sr/86Sri", ] + pyrolite.geochem.REE() self.df = normal_frame(size=4, columns=cols) self.df = renormalise(self.df)
def setUp(self): self.df = normal_frame().apply(close, axis=1) self.group = (self.df["MgO"] > 0.21).apply(int)
def setUp(self): self.data = normal_frame(columns=["SiO2", "CaO", "MgO"], size=20).values
def setUp(self): self.df = normal_frame().apply(close, axis=1)
def setUp(self): self.ser = normal_series() self.df = normal_frame()
these are complementary and the sine method is used here (where angles close to :math:`\pi / 2` represent close to zero abundance, and angles closer to zero/aligning with the repsective component axis represent higher abundances). See below for an example of this demonstrated graphically. First let's create some example mineralogical abundance data, where at least one of the minerals might occasionally have zero abundance: """ import numpy as np from pyrolite.util.synthetic import normal_frame # sphinx_gallery_thumbnail_number = 2 comp = normal_frame( columns=["Ab", "Qtz", "Ms", "Sch"], mean=[0.5, 1, 0.3, 0.05], cov=np.eye(3) * np.array([0.02, 0.5, 0.5]), size=10000, seed=145, ) comp += 0.05 * np.random.randn(*comp.shape) comp[comp <= 0] = 0 comp = comp.pyrocomp.renormalise(scale=1) ################################################################################ # We can quickly visualise this to see that it does indeed have some true zeros: # import matplotlib.pyplot as plt comp[["Qtz", "Ms", "Sch"]].pyroplot.scatter(alpha=0.05, c="k") plt.show() ################################################################################
def setUp(self): self.df = df = normal_frame( columns=["SiO2", "Na2O", "K2O", "Al2O3", "CaO"], mean=[0.5, 0.04, 0.05, 0.2, 0.3], size=100, )
def setUp(self): self.df = normal_frame( columns=["Quartz", "Alkali Feldspar", "Plagioclase"], mean=[0.5, 0.05, 0.45], size=100, )
def setUp(self): self.df = normal_frame()
def setUp(self): self.cols = ["SiO2", "CaO", "MgO", "FeO", "TiO2"] self.d = len(self.cols) self.n = 10 self.df = normal_frame(columns=self.cols, size=self.n)
def setUp(self): self.df = normal_frame( columns=["SiO2", "TiO2", "Al2O3", "Fe2O3", "FeO", "MnO"] + ["MgO", "CaO", "Na2O", "K2O", "P2O5", "CO2", "SO3"]) self.handler = "pyrolite.mineral.normative"
def setUp(self): self.df = normal_frame(columns=["SiO2", "Fe2O3", "FeO", "MnO"]) self.handler = "pyrolite.mineral.normative"
the compositional space. The commonly used log-transformations include the Additive Log-Ratio (:func:`~pyrolite.comp.pyrocomp.ALR`), Centred Log-Ratio (:func:`~pyrolite.comp.pyrocomp.CLR`), and Isometric Log-Ratio (:func:`~pyrolite.comp.pyrocomp.ILR`) [#ref_1]_ [#ref_2]_. This example will show you how to access and use some of these functions in pyrolite. """ ######################################################################################## # First let's create some example data: # from pyrolite.util.synthetic import normal_frame, random_cov_matrix df = normal_frame( size=100, cov=random_cov_matrix(sigmas=[0.1, 0.05, 0.3, 0.6], dim=4, seed=32), seed=32, ) df.describe() ######################################################################################## # Let's have a look at some of the log-transforms, which can be accessed directly from # your dataframes (via :class:`pyrolite.comp.pyrocomp`), after you've imported # :mod:`pyrolite.comp`. Note that the transformations will return *new* dataframes, # rather than modify their inputs. For example: # import pyrolite.comp lr_df = df.pyrocomp.CLR() # using a centred log-ratio transformation ######################################################################################## # The transformations are implemented such that the column names generally make it # evident which transformations have been applied (here using default simple labelling;
def setUp(self): self.df = normal_frame().pyrocomp.renormalise()
.. note:: This tutorial is a work in progress and will be gradually updated. In this tutorial we will illustrate some straightfoward formatting for your plots which will allow for greater customisation as needed. As :mod:`pyrolite` heavily uses and exposes the API of :mod:`matplotlib` for the visualisation components (and also :mod:`mpltern` for ternary diagrams), you should also check out their documentation pages for more in-depth guides, examples and API documentation. """ ####################################################################################### # First let's pull in a simple dataset to use throughout these examples: # from pyrolite.util.synthetic import normal_frame # sphinx_gallery_thumbnail_number = 7 df = normal_frame( columns=["SiO2", "CaO", "MgO", "Al2O3", "TiO2", "27Al", "d11B"]) ####################################################################################### # Basic Figure and Axes Settings # ------------------------------ # # :mod:`matplotlib` makes it relatively straightfoward to customise most settings for # your figures and axes. These settings can be defined at creation (e.g. in a call to # :func:`~matplotlib.pyplot.subplots`), or they can be defined after you've created an # axis (with the methods :code:`ax.set_<parameter>()`). For example: # import matplotlib.pyplot as plt fig, ax = plt.subplots(1) ax.set_xlabel("My X Axis Label") ax.set_title("My Axis Title", fontsize=12)
def setUp(self): self.df = normal_frame( columns=["Ca", "Na", "K"], mean=[0.5, 0.05, 0.45], size=100, )
def setUp(self): self.df = normal_frame().applymap(str)
import unittest import pandas as pd import numpy as np import matplotlib.pyplot as plt from pyrolite.util.resampling import ( _segmented_univariate_distance_matrix, univariate_distance_matrix, get_spatiotemporal_resampling_weights, add_age_noise, spatiotemporal_bootstrap_resample, ) from pyrolite.util.spatial import great_circle_distance from pyrolite.util.synthetic import normal_frame df = normal_frame() sample = df.sample(frac=1, replace=True) dir(df.index) df.index.take(sample.index) df.index.intersection(sample.index) def _get_spatiotemporal_dataframe(size, geochem_columns=[]): df = pd.DataFrame( index=range(size), columns=["Latitude", "Longitude", "Age", "AgeUncertainty", "MinAge", "MaxAge",], ) df["Latitude"] = 20 + np.random.randn(size) df["Longitude"] = 85 + np.random.randn(size) df["Age"] = 100 + np.random.randint(0, 100, size)
def setUp(self): self.cols = ["SiO2", "CaO", "MgO", "FeO", "TiO2"] self.df = normal_frame(columns=self.cols)
def setUp(self): self.df = normal_frame( columns=["Sand", "Clay", "Silt"], mean=[0.5, 0.05, 0.45], size=100, )
One of pyrolite's strengths is converting mixed elemental and oxide data to a new form. The simplest way to perform this is by using the :func:`~pyrolite.geochem.transform.convert_chemistry` function. Note that by default pyrolite assumes that data are in the same units. """ import pyrolite.geochem import pandas as pd pd.set_option("precision", 3) # smaller outputs ######################################################################################## # Here we create some synthetic data to work with, which has some variables in Wt% and # some in ppm. Notably some elements are present in more than one column (Ca, Na): # from pyrolite.util.synthetic import normal_frame df = normal_frame( columns=["MgO", "SiO2", "FeO", "CaO", "Na2O", "Te", "K", "Na"]) * 100 df.pyrochem.elements *= 100 # elements in ppm ######################################################################################## df.head(2) ######################################################################################## # As the units are heterogeneous, we'll need to convert the data frame to a single set of # units (here we use Wt%): # df.pyrochem.elements = df.pyrochem.elements.pyrochem.scale("ppm", "wt%") # ppm to wt% ######################################################################################## # We can transform this chemical data to a new set of compositional variables. # Here we i) convert CaO to Ca, ii) aggregate Na2O and Na to Na and iii) calculate # mass ratios for Na/Te and MgO/SiO2. # Note that you can also use this function to calculate mass ratios: #
def setUp(self): self.cols = ["SiO2", "CaO", "MgO", "FeO", "TiO2"] self.df = normal_frame(columns=self.cols) self.df = self.df.apply(lambda x: x / np.sum(x), axis="columns")
def setUp(self): self.df0 = normal_frame() self.others = [normal_frame()] * 4