def test_compare_scikit_learn(): X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) pcas = sklPCA(n_components=2) pcas.fit(X) pca = PCA(NDDataset(X)) pca.printev(n_pc=2) assert_array_almost_equal(pca.sv.data, pcas.singular_values_) assert_array_almost_equal(pca.ev_ratio.data, pcas.explained_variance_ratio_ * 100.) dataset = NDDataset.read('irdata/nh4y-activation.spg') X = dataset.data pcas = sklPCA(n_components=5) pcas.fit(X) dataset = X.copy() pca = PCA(NDDataset(dataset)) pca.printev(n_pc=5) assert_array_almost_equal(pca.sv.data[:5], pcas.singular_values_[:5], 4) assert_array_almost_equal(pca.ev_ratio.data[:5], pcas.explained_variance_ratio_[:5] * 100., 4)
def coverages_vs_time(surface, t, returnNDDataset=False): ''' Returns the surface coverages at time(s) t params: ------ surface: instance of cantera.composite.Interface t: iterable or spectrochempy.Coord, times at which the coverages must be computed return_NDDataset: boolean, if True returns the concentration matrix as a NDDataset, else as a np.ndarray default: False ''' if not HAS_CANTERA: raise SpectroChemPyException( 'Cantera is not available : please install it before continuing: \n' 'conda install -c cantera cantera') init_coverages = surface.coverages coverages = np.zeros((len(t), surface.coverages.shape[0])) if type(t) is Coord: t = t.data for i, ti in enumerate(t): surface.coverages = init_coverages surface.advance_coverages(ti) coverages[i, :] = surface.coverages surface.coverages = init_coverages if returnNDDataset: coverages = NDDataset(coverages) coverages.y = Coord(t, title='time') coverages.x.title = 'coverage / -' coverages.x.labels = surface.species_names return coverages
def test_read_zip(): A = NDDataset.read_zip( "agirdata/P350/FTIR/FTIR.zip", origin="omnic", only=10, csv_delimiter=";", merge=True, ) assert A.shape == (10, 2843) # Test bytes contents for ZIP files z = DATADIR / "agirdata" / "P350" / "FTIR" / "FTIR.zip" content2 = z.read_bytes() B = NDDataset.read_zip({"name.zip": content2}, origin="omnic", only=10, csv_delimiter=";", merge=True) assert B.shape == (10, 2843) # Test read_zip with several contents C = NDDataset.read_zip( { "name1.zip": content2, "name2.zip": content2 }, origin="omnic", only=10, csv_delimiter=";", merge=True, ) assert C.shape == (20, 2843)
def coverages_vs_time(surface, t, returnNDDataset=False): """ Returns the surface coverages at time(s) t. Parameters ---------- surface: instance of cantera.composite.Interface. tim: iterable or spectrochempy.Coord. Times at which the coverages must be computed. returnNDDataset: boolean, default: False. If True returns the concentration matrix as a NDDataset, else as a np.ndarray. """ init_coverages = surface.coverages coverages = np.zeros((len(t), surface.coverages.shape[0])) if isinstance(t, Coord): t = t.data for i, tim in enumerate(t): surface.coverages = init_coverages surface.advance_coverages(tim) coverages[i, :] = surface.coverages surface.coverages = init_coverages if returnNDDataset: coverages = NDDataset(coverages) coverages.y = Coord(t, title="time") coverages.x.title = "coverage / -" coverages.x.labels = surface.species_names return coverages
def test_nddataset_subtract_mismatch_units(): d1 = NDDataset(np.ones((5, 5)), units='m') d2 = NDDataset(np.ones((5, 5)) * 2., units='m/s') with pytest.raises(DimensionalityError) as exc: d1 -= d2 assert str(exc.value) == "Cannot convert from '[length] / [time]' to '[length]', " \ "Units must be compatible for the `isub` operator"
def test_read_csv(): datadir = prefs.datadir prefs.csv_delimiter = ',' A = NDDataset.read_csv('agirdata/P350/TGA/tg.csv', directory=datadir, origin='tga') assert A.shape == (1, 3247) B = NDDataset.read_csv('irdata/IR.CSV', origin='omnic') assert B.shape == (1, 3736) # without directory C = NDDataset.read_csv('irdata/IR.CSV') assert C.shape == (1, 3736) # pathlib.Path objects can be used instead of string for filenames p = Path(datadir) / 'irdata' / 'IR.CSV' D = scp.read_csv(p) assert D == C # Read CSV content content = p.read_bytes() E = scp.read_csv({'somename.csv': content}) assert E == C
def simple_project(): proj = Project( # subprojects Project(name='P350', label=r'$\mathrm{M_P}\,(623\,K)$'), Project(name='A350', label=r'$\mathrm{M_A}\,(623\,K)$'), Project(name='B350', label=r'$\mathrm{M_B}\,(623\,K)$'), # attributes name='project_1', label='main project', ) assert proj.projects_names == ['P350', 'A350', 'B350'] ir = NDDataset([1.1, 2.2, 3.3], coordset=[[1, 2, 3]]) tg = NDDataset([1, 3, 4], coordset=[[1, 2, 3]]) proj.A350['IR'] = ir proj.A350['TG'] = tg script_source = 'set_loglevel(INFO)\n' \ 'info_(f"samples contained in the project are {proj.projects_names}")' proj['print_info'] = Script('print_info', script_source) return proj
def reconstruct(self): """ Transform data back to the original space The following matrix operation is performed : :math:`\\hat{X} = K.f[i]` for each value of the regularization parameter. Returns ------- X_hat : |NDDataset| The reconstructed dataset. """ #TODO: adapt for non-regularized / 1D IRIS if len(self.lamda) == 1 and self.lamda == [0]: X_hat = NDDataset(np.zeros( (self.f.z.size, *self.X.shape)).squeeze(axis=0), title=self.X.title, units=self.X.units) X_hat.set_coordset(y=self.X.y, x=self.X.x) X_hat.data = np.dot(self.K.data, self.f.data.squeeze()) else: X_hat = NDDataset(np.zeros((self.f.z.size, *self.X.shape)), title=self.X.title, units=self.X.units) X_hat.set_coordset( z=self.f.z, y=self.X.y, x=self.X.x ) # TODO: take into account the fact that coordinates # may have other names for i in range(X_hat.z.size): X_hat[i].data = np.dot(self.K.data, self.f[i].data.squeeze()) X_hat.name = '2D-IRIS Reconstructed datasets' return X_hat
def test_nddataset_subtract_mismatch_shape(): d1 = NDDataset(np.ones((5, 5))) d2 = NDDataset(np.ones((6, 6)) * 2.0) with pytest.raises(ArithmeticError) as exc: d1 -= d2 assert exc.value.args[0].startswith( "operands could not be broadcast together")
def test_read_omnic_contents(): # test read_omnic with byte spg content datadir = prefs.datadir filename = 'wodger.spg' with open(os.path.join(datadir, filename), 'rb') as fil: content = fil.read() nd1 = scp.read_omnic(filename) nd2 = scp.read_omnic({filename: content}) assert nd1 == nd2 # Test bytes contents for spa files datadir = prefs.datadir filename = '7_CZ0-100 Pd_101.SPA' with open(os.path.join(datadir, 'irdata', 'subdir', filename), 'rb') as fil: content = fil.read() nd = NDDataset.read_omnic({filename: content}) assert nd.shape == (1, 5549) # test read_omnic with several contents datadir = prefs.datadir filename1 = '7_CZ0-100 Pd_101.SPA' with open(os.path.join(datadir, 'irdata', 'subdir', filename1), 'rb') as fil: content1 = fil.read() filename2 = 'wodger.spg' with open(os.path.join(datadir, filename2), 'rb') as fil: content2 = fil.read() listnd = NDDataset.read_omnic({ filename1: content1, filename2: content2 }, merge=True) assert listnd.shape == (3, 5549)
def test_nddataset_fancy_indexing(): # numpy vs dataset rand = np.random.RandomState(42) x = rand.randint(100, size=10) # single value indexing dx = NDDataset(x) assert (dx[3].data, dx[7].data, dx[2].data) == (x[3], x[7], x[2]) # slice indexing assert_array_equal(dx[3:7].data, x[3:7]) # boolean indexingassert assert_array_equal(dx[x > 52].data, x[x > 52]) # fancy indexing ind = [3, 7, 4] assert_array_equal(dx[ind].data, x[ind]) ind = np.array([[3, 7], [4, 5]]) assert_array_equal(dx[ind].data, x[ind]) with RandomSeedContext(1234): a = np.random.random((3, 5)).round(1) c = (np.arange(3), np.arange(5)) nd = NDDataset(a, coordset=c) a = nd[[1, 0, 2]] a = nd[np.array([1, 0])]
def test_nddataset_add(): d1 = NDDataset(np.ones((5, 5)), name="d1") d2 = NDDataset(np.ones((5, 5)), name="d2") d3 = -d1 assert d3.name != d1 d3 = d1 * 0.5 + d2 assert isinstance(d3, NDDataset) assert np.all(d3.data == 1.5)
def test_arithmetic_unit_calculation(unit1, unit2, op, result_units): ndd1 = NDDataset(np.array([1]), units=unit1) ndd2 = NDDataset(np.array([1]), units=unit2) ndd1_method = ndd1.__getattribute__(op) result = ndd1_method(ndd2) try: assert result.units == result_units except AssertionError: assert_equal_units(ndd1_method(ndd2).units, result_units)
def test_ndmath_absolute_of_quaternion(): na0 = np.array([[1., 2., 2., 0., 0., 0.], [1.3, 2., 2., 0.5, 1., 1.], [1, 4.2, 2., 3., 2., 2.], [5., 4.2, 2., 3., 3., 3.]]) nd = NDDataset(na0, dtype=quaternion) coords = CoordSet(np.linspace(-1, 1, 2), np.linspace(-10., 10., 3)) assert nd.shape == (2, 3) nd.set_coordset(**coords) np.abs(nd)
def test_project_with_script(): # Example from tutorial agir notebook proj = Project( Project(name="P350", label=r"$\mathrm{M_P}\,(623\,K)$"), Project(name="A350", label=r"$\mathrm{M_A}\,(623\,K)$"), Project(name="B350", label=r"$\mathrm{M_B}\,(623\,K)$"), name="HIZECOKE_TEST", ) assert proj.projects_names == ["P350", "A350", "B350"] # add a dataset to a subproject ir = NDDataset([1, 2, 3]) tg = NDDataset([1, 3, 4]) proj.A350["IR"] = ir proj["TG"] = tg print(proj.A350) print(proj) print(proj.A350.label) f = proj.save() newproj = Project.load("HIZECOKE_TEST") # print(newproj) assert str(newproj) == str(proj) assert newproj.A350.label == proj.A350.label # proj = Project.load('HIZECOKE') # assert proj.projects_names == ['A350', 'B350', 'P350'] script_source = ("set_loglevel(INFO)\n" 'info_("samples contained in the project are : ' '%s"%proj.projects_names)') proj["print_info"] = Script("print_info", script_source) # print(proj) # save but do not change the original data proj.save_as("HIZECOKE_TEST", overwrite_data=False) newproj = Project.load("HIZECOKE_TEST") # execute run_script(newproj.print_info, locals()) newproj.print_info.execute(locals()) newproj.print_info(locals()) # attempts to resolve locals newproj.print_info() proj.save_as("HIZECOKE_TEST") newproj = Project.load("HIZECOKE_TEST")
def test_nmr_reader_2D(): path = os.path.join(prefs.datadir, 'nmrdata', 'bruker', 'tests', 'nmr', 'topspin_2d') # load the data in a new dataset ndd = NDDataset() ndd.read_topspin(path, expno=1, remove_digital_filter=True) assert ndd.__str__( ) == "NDDataset: [quaternion] unitless (shape: (y:96, x:948))" assert "<tr><td style='padding-right:5px; padding-bottom:0px; padding-top:0px;" in ndd._repr_html_( )
def test_ndio_2D(IR_dataset_2D): # test with a 2D ir2 = IR_dataset_2D.copy() f = ir2.save_as("essai2D", confirm=False) assert ir2.directory == irdatadir with pytest.raises(FileNotFoundError): NDDataset.load("essai2D") nd = NDDataset.load(prefs.datadir / "irdata/essai2D") assert nd.directory == irdatadir f.unlink()
def test_project_with_script(): # Example from tutorial agir notebook proj = Project(Project(name='P350', label=r'$\mathrm{M_P}\,(623\,K)$'), Project(name='A350', label=r'$\mathrm{M_A}\,(623\,K)$'), Project(name='B350', label=r'$\mathrm{M_B}\,(623\,K)$'), name='HIZECOKE_TEST') assert proj.projects_names == ['P350', 'A350', 'B350'] # add a dataset to a subproject ir = NDDataset([1, 2, 3]) tg = NDDataset([1, 3, 4]) proj.A350['IR'] = ir proj['TG'] = tg print(proj.A350) print(proj) print(proj.A350.label) f = proj.save() newproj = Project.load('HIZECOKE_TEST') # print(newproj) assert str(newproj) == str(proj) assert newproj.A350.label == proj.A350.label # proj = Project.load('HIZECOKE') # assert proj.projects_names == ['A350', 'B350', 'P350'] script_source = 'set_loglevel(INFO)\n' \ 'info_("samples contained in the project are : ' \ '%s"%proj.projects_names)' proj['print_info'] = Script('print_info', script_source) # print(proj) # save but do not chnge the original data proj.save_as('HIZECOKE_TEST', overwrite_data=False) newproj = Project.load('HIZECOKE_TEST') # execute run_script(newproj.print_info, locals()) newproj.print_info.execute(locals()) newproj.print_info(locals()) # attemps to resolve locals newproj.print_info() proj.save_as('HIZECOKE_TEST') newproj = Project.load('HIZECOKE_TEST')
def composition_vs_time(self, time, returnNDDataset=True): if isinstance(time, Coord): time = time.data X = np.zeros((len(time), self.n_cstr, self.n_gas_species)) coverages = np.zeros((len(time), self.n_cstr, self.n_surface_species)) for i, t in enumerate(time): self.advance(t) X[i] = self.X coverages[i] = self.coverages if returnNDDataset: X = NDDataset(X) X.title = "mol fraction" X.z = Coord(time, title="time") X.y.labels = [r.name for r in self.cstr] X.x.title = "species" X.x.labels = self.cstr[0].kinetics.species_names coverages = NDDataset(coverages) coverages.title = "coverage" coverages.z = Coord(time, title="time") coverages.x.title = "species" coverages.x.labels = self.surface[0].kinetics.species_names coverages.y.title = "reactor" coverages.y.labels = [r.name for r in self.cstr] return {"X": X, "coverages": coverages}
def test_read_remote(): filename = IRDATA / "nh4y-activation.spg" # read normally nd1 = NDDataset.read_omnic(filename) assert str(nd1) == "NDDataset: [float64] a.u. (shape: (y:55, x:5549))" # move the files to simulate their absence: filesaved = filename.rename("~irdata_save") # now try to read nd2 = NDDataset.read_omnic("irdata/nh4y-activation.spg") assert str(nd2) == "NDDataset: [float64] a.u. (shape: (y:55, x:5549))"
def test_nddataset_add_mismatch_coords(): coord1 = Coord(np.arange(5.0)) coord2 = Coord(np.arange(1.0, 5.5, 1.0)) d1 = NDDataset(np.ones((5, 5)), coordset=[coord1, coord2]) d2 = NDDataset(np.ones((5, 5)), coordset=[coord2, coord1]) with pytest.raises(CoordinateMismatchError) as exc: d1 -= d2 assert str( exc.value).startswith("\nCoord.data attributes are not almost equal") with pytest.raises(CoordinateMismatchError) as exc: d1 += d2 assert str(exc.value).startswith( "\nCoord.data attributes are not almost equal" ) # TODO= make more tests like this for various functions
def test_nddataset_add_with_masks(): # numpy masked arrays mask the result of binary operations if the # mask of either operand is set. # Does NDData? ndd1 = NDDataset(np.array([1, 2])) ndd2 = NDDataset(np.array([2, 1])) result = ndd1 + ndd2 assert_array_equal(result.data, np.array([3, 3])) ndd1 = NDDataset(np.array([1, 2]), mask=np.array([True, False])) other_mask = ~ndd1.mask ndd2 = NDDataset(np.array([2, 1]), mask=other_mask) result = ndd1 + ndd2 # The result should have all entries masked... assert result.mask.all()
def test_nddataset_add_mismatch_units(): d1 = NDDataset(np.ones((5, 5)), units="cm^2") d2 = NDDataset(np.ones((5, 5)), units="cm") with pytest.raises(DimensionalityError) as exc: d1 + d2 assert str(exc.value).startswith( "Cannot convert from '[length]' to '[length]^2', " "Units must be compatible for the `add` operator") with pytest.raises(DimensionalityError) as exc: d1 += d2 assert str(exc.value).startswith( "Cannot convert from '[length]' to '[length]^2', " "Units must be compatible for the `iadd` operator")
def test_IRIS(): X = NDDataset.read_omnic(os.path.join('irdata', 'CO@Mo_Al2O3.SPG')) p = [ 0.00300, 0.00400, 0.00900, 0.01400, 0.02100, 0.02600, 0.03600, 0.05100, 0.09300, 0.15000, 0.20300, 0.30000, 0.40400, 0.50300, 0.60200, 0.70200, 0.80100, 0.90500, 1.00400 ] X.coordset.update(y=Coord(p, title='pressure', units='torr')) # Using the `update` method is mandatory because it will preserve the name. # Indeed, setting using X.coordset[0] = Coord(...) fails unless name is specified: Coord(..., name='y') # set the optimization parameters, perform the analysis # and plot the results param = { 'epsRange': [-8, -1, 20], 'lambdaRange': [-7, -5, 3], 'kernel': 'langmuir' } X_ = X[:, 2250.:1950.] X_.plot() iris = IRIS(X_, param, verbose=True) f = iris.f X_hat = iris.reconstruct() iris.plotlcurve(scale='ln') f[0].plot(method='map', plottitle=True) X_hat[0].plot(plottitle=True) show()
def test_nmr_auto_1D_phasing(): path = os.path.join(prefs.datadir, 'nmrdata', 'bruker', 'tests', 'nmr', 'topspin_1d') ndd = NDDataset.read_topspin(path, expno=1, remove_digital_filter=True) ndd /= ndd.real.data.max() # normalize ndd.em(10. * ur.Hz, inplace=True) transf = ndd.fft(tdeff=8192, size=2**15) transf.plot(xlim=(20, -20), ls=':', color='k') transfph2 = transf.pk(verbose=True) transfph2.plot(xlim=(20, -20), clear=False, color='r') # automatic phasing transfph3 = transf.apk(verbose=True) (transfph3 - 1).plot(xlim=(20, -20), clear=False, color='b') transfph4 = transf.apk(algorithm='acme', verbose=True) (transfph4 - 2).plot(xlim=(20, -20), clear=False, color='g') transfph5 = transf.apk(algorithm='neg_peak', verbose=True) (transfph5 - 3).plot(xlim=(20, -20), clear=False, ls='-', color='r') transfph6 = transf.apk(algorithm='neg_area', verbose=True) (transfph6 - 4).plot(xlim=(20, -20), clear=False, ls='-.', color='m') transfph4 = transfph6.apk(algorithm='acme', verbose=True) (transfph4 - 6).plot(xlim=(20, -20), clear=False, color='b') show()
def NMR_dataset_2D(): path = datadir / 'nmrdata' / 'bruker' / 'tests' / 'nmr' / 'topspin_2d' / '1' / 'ser' dataset = NDDataset.read_topspin(path, expno=1, remove_digital_filter=True, name="NMR_2D") return dataset.copy()
def test_nddataset_add_units_with_different_scale(): d1 = NDDataset(np.ones((5, 5)), units="m") d2 = NDDataset(np.ones((5, 5)), units="cm") x = d1 + 1.0 * ur.cm assert x[0, 0].values == 1.01 * ur.m x = d1 + d2 assert x.data[0, 0] == 1.01 x = d2 + d1 assert x.data[0, 0] == 101.0 d1 += d2 assert d1.data[0, 0] == 1.01 d2 += d1 assert d2.data[0, 0] == 102.0
def test_nmr_auto_1D_phasing(): path = os.path.join(prefs.datadir, "nmrdata", "bruker", "tests", "nmr", "topspin_1d") ndd = NDDataset.read_topspin(path, expno=1, remove_digital_filter=True) ndd /= ndd.real.data.max() # normalize ndd.em(10.0 * ur.Hz, inplace=True) transf = ndd.fft(tdeff=8192, size=2**15) transf.plot(xlim=(20, -20), ls=":", color="k") transfph2 = transf.pk(verbose=True) transfph2.plot(xlim=(20, -20), clear=False, color="r") # automatic phasing transfph3 = transf.apk(verbose=True) (transfph3 - 1).plot(xlim=(20, -20), clear=False, color="b") transfph4 = transf.apk(algorithm="acme", verbose=True) (transfph4 - 2).plot(xlim=(20, -20), clear=False, color="g") transfph5 = transf.apk(algorithm="neg_peak", verbose=True) (transfph5 - 3).plot(xlim=(20, -20), clear=False, ls="-", color="r") transfph6 = transf.apk(algorithm="neg_area", verbose=True) (transfph6 - 4).plot(xlim=(20, -20), clear=False, ls="-.", color="m") transfph4 = transfph6.apk(algorithm="acme", verbose=True) (transfph4 - 6).plot(xlim=(20, -20), clear=False, color="b") show()
def test_simple_arithmetic_on_full_dataset(): # due to a bug in notebook with the following import os dataset = NDDataset.read_omnic( os.path.join('irdata', 'nh4y-activation.spg')) dataset - dataset[ 0] # suppress the first spectrum to all other spectra in the series
def __getitem__(self, key): member = False ext = None if key in self.files: member = True _, ext = os.path.splitext(key) if member and ext in [".npy"]: f = self.zip.open(key) return read_array(f, allow_pickle=True) elif member and ext in [".scp"]: from spectrochempy.core.dataset.nddataset import NDDataset # f = io.BytesIO(self.zip.read(key)) content = self.zip.read(key) return NDDataset.load(key, content=content) elif member and ext in [".json"]: content = self.zip.read(key) return json.loads(content, object_hook=json_decoder) elif member: return self.zip.read(key) else: raise KeyError("%s is not a file in the archive or is not " "allowed" % key)