def test_read_matlab():

    A = NDDataset.read_matlab(MATLABDATA / "als2004dataset.MAT")
    assert len(A) == 6
    assert A[3].shape == (4, 96)

    A = NDDataset.read_matlab(MATLABDATA / "dso.mat")
    assert A.name == "Group sust_base line withoutEQU.SPG"
    assert A.shape == (20, 426)
Пример #2
0
def test_read_opus():
    # single file
    A = NDDataset.read_opus(OPUSDATA / "test.0000")
    assert A.shape == (1, 2567)
    assert A[0, 2303.8694].data == pytest.approx(2.72740, 0.00001)

    # read contents
    p = OPUSDATA / "test.0000"
    content = p.read_bytes()
    F = NDDataset.read_opus({p.name: content})
    assert F.name == p.name
    assert F.shape == (1, 2567)

    assert NDDataset.read_opus(OPUSDATA / "background.0") is None
Пример #3
0
def test_write():
    nd = scp.read_omnic('irdata/nh4y-activation.spg')

    # API write methods needs an instance of a NDDataset as the first argument
    with pytest.raises(TypeError):
        scp.write()

    # the simplest way to save a dataset, is to use the function write with a filename as argument
    filename = nd.write('essai.scp')
    assert filename == cwd / 'essai.scp'

    nd2 = NDDataset.load(filename)
    testing.assert_dataset_equal(nd2, nd)
    filename.unlink()

    # if the filename is omitted, the a dialog is opened to select a name (and a protocol)
    filename = nd.write()
    assert filename is not None
    assert filename.stem == nd.name
    assert filename.suffix == '.scp'
    filename.unlink()

    # # a write protocole can be specified
    # filename = nd.write(protocole='json')
    # assert filename is not None
    # assert filename.stem == nd.name
    # assert filename.suffix == '.json'
    # filename.unlink()

    irdatadir = pathclean(prefs.datadir) / "irdata"
    for f in ['essai.scp', 'nh4y-activation.scp']:
        if (irdatadir / f).is_file():
            (irdatadir / f).unlink()
Пример #4
0
def test_notebook_basecor_bug():
    dataset = NDDataset.read_omnic(
        os.path.join('irdata', 'nh4y-activation.spg'))

    s = dataset[:, 1260.0:5999.0]
    s = s - s[-1]

    # Important note that we use floating point number
    # integer would mean points, not wavenumbers!

    basc = BaselineCorrection(s)

    ranges = [[1261.86, 1285.89], [1556.30, 1568.26], [1795.00, 1956.75],
              [3766.03, 3915.81], [4574.26, 4616.04], [4980.10, 4998.01],
              [5437.52, 5994.70]]  # predifined ranges

    _ = basc.run(*ranges,
                 method='multivariate',
                 interpolation='pchip',
                 npc=5,
                 figsize=(6, 6),
                 zoompreview=4)

    # The regions used to set the baseline are accessible using the `ranges`
    #  attibute:
    ranges = basc.ranges
    print(ranges)

    basc.corrected.plot_stack()
Пример #5
0
def test_issue_375():

    # minimal example
    n_pc = 3

    color1, color2 = "b", "r"

    ratio = NDDataset([1, 2, 3])
    cum = ratio.cumsum()

    ax1 = ratio.plot_bar(color=color1, title="Scree plot")
    assert len(ax1.lines) == 0, "no lines"
    assert len(ax1.patches) == 3, "bar present"
    ax2 = cum.plot_scatter(color=color2, pen=True, markersize=7.0, twinx=ax1)
    assert len(ax2.lines) == 1, "1 lines"
    assert len(ax2.patches) == 0, "no bar present on the second plot"
    # TODO: Don't know yet how to get the marker present.
    ax1.set_title("Scree plot")
    show()
def make_samples(force_original=False):
    _samples = {
        'P350': {
            'label': r'$\mathrm{M_P}\,(623\,K)$'
        },
        # 'A350': {'label': r'$\mathrm{M_A}\,(623\,K)$'},
        # 'B350': {'label': r'$\mathrm{M_B}\,(623\,K)$'}
    }

    for key, sample in _samples.items():
        # our data are in our test `datadir` directory.
        basename = os.path.join(prefs.datadir, f'agirdata/{key}/FTIR/FTIR')
        if os.path.exists(basename + '.scp') and not force_original:
            # check if the scp file have already been saved
            filename = basename + '.scp'
            sample['IR'] = NDDataset.read(filename)
        else:
            # else read the original zip file
            filename = basename + '.zip'
            sample['IR'] = NDDataset.read_zip(filename,
                                              only=5,
                                              origin='omnic',
                                              merge=True)
            # save
            sample['IR'].save()

    for key, sample in _samples.items():
        basename = os.path.join(prefs.datadir, f'agirdata/{key}/TGA/tg')
        if os.path.exists(basename + '.scp') and not force_original:
            # check if the scp file have already been saved
            filename = basename + '.scp'
            sample['TGA'] = NDDataset.read(filename)
        else:
            # else read the original csv file
            filename = basename + '.csv'
            ss = sample['TGA'] = NDDataset.read_csv(filename, origin='tga')
            ss.squeeze(inplace=True)
            # lets keep only data from something close to 0.
            s = sample['TGA'] = ss[-0.5:35.0]
            # save
            s.save()

    return _samples
Пример #7
0
def test_generic_read():
    # filename + extension specified
    ds = scp.read('wodger.spg')
    assert ds.name == 'wodger'

    # save with no filename (should save wodger.scp)
    path = ds.save()

    assert isinstance(path, Path)
    assert path.stem == ds.name
    assert path.parent == ds.directory

    # read should be équivalent to load (but read is a more general function,
    dataset = NDDataset.load('wodger.scp')
    assert dataset.name == 'wodger'
Пример #8
0
def test_plot2D_as_3D():
    data = NDDataset.read_matlab(
        os.path.join('matlabdata', 'als2004dataset.MAT'))

    X = data[0]
    # X.plot_3D()

    X.plot_surface()

    X.set_coordset(y=Coord(title='elution time'), x=Coord(title='wavenumbers'))
    X.title = 'intensity'
    X.plot_surface()

    X.plot_surface(colorbar=True)

    show()

    pass
Пример #9
0
def test_plot2D():
    A = NDDataset.read_omnic("irdata/nh4y-activation.spg")
    A.y -= A.y[0]
    A.y.to("hour", inplace=True)
    A.y.title = "Acquisition time"
    A.copy().plot_stack()
    A.copy().plot_stack(data_transposed=True)
    A.copy().plot_image(style=["sans", "paper"], fontsize=9)

    # use preferences
    prefs = A.preferences
    prefs.reset()
    prefs.image.cmap = "magma"
    prefs.font.size = 10
    prefs.font.weight = "bold"
    prefs.axes.grid = True
    A.plot()
    A.plot(style=["sans", "paper", "grayscale"], colorbar=False)

    show()
Пример #10
0
def test_plot2D_as_3D():
    data = NDDataset.read_matlab(
        os.path.join("matlabdata", "als2004dataset.MAT"))

    X = data[0]

    X.plot_surface()

    X.set_coordset(
        y=Coord(title="elution time", units="s"),
        x=Coord(title="wavenumbers", units="cm^-1"),
    )
    X.title = "intensity"
    X.plot_surface()

    X.plot_surface(colorbar=True)

    show()

    pass
Пример #11
0
def test_plot2D():
    A = NDDataset.read_omnic('irdata/nh4y-activation.spg')
    A.y -= A.y[0]
    A.y.to('hour', inplace=True)
    A.y.title = u'Aquisition time'
    A.copy().plot_stack()
    A.copy().plot_stack(data_transposed=True)
    A.copy().plot_image(style=['sans', 'paper'], fontsize=9)

    # use preferences
    prefs = A.preferences
    prefs.reset()
    prefs.image.cmap = 'magma'
    prefs.font.size = 10
    prefs.font.weight = 'bold'
    prefs.axes.grid = True
    A.plot()
    A.plot(style=['sans', 'paper', 'grayscale'], colorbar=False)
    show()
    pass
Пример #12
0
def test_write():
    nd = scp.read_omnic("irdata/nh4y-activation.spg")

    # API write methods needs an instance of a NDDataset as the first argument
    with pytest.raises(TypeError):
        scp.write()

    # the simplest way to save a dataset, is to use the function write with a filename as argument
    if (cwd / "essai.scp").exists():
        (cwd / "essai.scp").unlink()

    filename = nd.write("essai.scp")  # should not open a DIALOG
    assert filename == cwd / "essai.scp"
    assert filename.exists()

    # try to write it again
    filename = nd.write("essai.scp")  # should open a DIALOG to confirm

    nd2 = NDDataset.load(filename)
    testing.assert_dataset_equal(nd2, nd)
    filename.unlink()

    # if the filename is omitted, a dialog is opened to select a name (and a protocol)
    filename = nd.write()
    assert filename is not None
    assert filename.stem == nd.name
    assert filename.suffix == ".scp"
    filename.unlink()

    # # a write protocole can be specified
    # filename = nd.write(protocole='json')
    # assert filename is not None
    # assert filename.stem == nd.name
    # assert filename.suffix == '.json'
    # filename.unlink()

    irdatadir = pathclean(prefs.datadir) / "irdata"
    for f in ["essai.scp", "nh4y-activation.scp"]:
        if (irdatadir / f).is_file():
            (irdatadir / f).unlink()
Пример #13
0
def test_plot_1D():

    dataset = NDDataset.read_omnic(
        os.path.join("irdata", "nh4y-activation.spg"))

    # get first 1D spectrum
    nd0 = dataset[0, 1550.0:1600.0]

    # plot generic 1D
    nd0.plot()
    nd0.plot_scatter(plottitle=True)
    nd0.plot_scatter(marker="^", markevery=10, title="scatter+marker")
    prefs = nd0.preferences
    prefs.method_1D = "scatter+pen"

    nd0.plot(title="xxxx")
    prefs.method_1D = "pen"
    nd0.plot(marker="o", markevery=10, title="with marker")

    # plot 1D column
    col = dataset[:, 3500.0]  # note the indexing using wavenumber!
    _ = col.plot_scatter()

    _ = col.plot_scatter(uselabel=True)
def test_read_carroucell(monkeypatch):

    # Before testing we need to download the data locally if not yet done:
    # because read carrousel is not designed to download itself.
    # Use the read_remote for that:
    NDDataset.read_remote("irdata/carroucell_samp", replace_existing=False)

    nd = NDDataset.read_carroucell("irdata/carroucell_samp", spectra=(1, 2))
    for x in nd:
        info_("  " + x.name + ": " + str(x.shape))
    assert len(nd) == 11
    assert nd[3].shape == (2, 11098)

    nd = NDDataset.read_carroucell("irdata/carroucell_samp", spectra=(1, 1))
    assert isinstance(nd, NDDataset)

    monkeypatch.setattr(spectrochempy.core, "open_dialog", dialog_carroucell)
    monkeypatch.setenv("KEEP_DIALOGS", "True")
    nd = NDDataset.read_carroucell(spectra=(1, 3))
    assert nd[3].shape == (3, 11098)

    nd = NDDataset.read_carroucell(spectra=(2, 3), discardbg=False)
    assert nd[3].shape == (2, 11098)
Пример #15
0
def test_plotly2D():
    A = NDDataset.read_omnic('irdata/nh4y-activation.spg',
                             directory=prefs.datadir)
    A.y -= A.y[0]
    A.y.to('hour', inplace=True)
    A.y.title = u'Aquisition time'
Пример #16
0
def test_read_quadera():
    # single file
    A = NDDataset.read_quadera('msdata/ion_currents.asc')
    assert str(A) == 'NDDataset: [float64] A (shape: (y:16975, x:10))'
Пример #17
0
def fake_dataset(*args, size=3, **kwargs):
    if not args:
        ds = NDDataset([range(size)])
    else:
        ds = NDDataset([[range(4)]], )
    return ds
Пример #18
0
def test_1D():
    dataset = NDDataset.read_omnic(
        os.path.join(prefs.datadir, 'irdata', 'nh4y-activation.spg'))

    # get first spectrum
    nd0 = dataset[0]

    # plot generic
    nd0.plot()

    # nd0.plot(output=os.path.join(figures_dir, 'IR_dataset_1D'),
    #          savedpi=150)
    #
    # # plot generic style
    # nd0.plot(style='poster',
    #          output=os.path.join(figures_dir, 'IR_dataset_1D_poster'),
    #          savedpi=150)
    #
    # # check that style reinit to default
    # nd0.plot(output='IR_dataset_1D', savedpi=150)
    # # try:
    # #     assert same_images('IR_dataset_1D.png',
    # #                        os.path.join(figures_dir, 'IR_dataset_1D.png'))
    # # except AssertionError:
    # #     os.remove('IR_dataset_1D.png')
    # #     raise AssertionError('comparison fails')
    # # os.remove('IR_dataset_1D.png')
    #
    # # try other type of plots
    # nd0.plot_pen()
    # nd0[:, ::100].plot_scatter()
    # nd0.plot_lines()
    # nd0[:, ::100].plot_bar()
    #
    # show()
    #
    # # multiple
    # d = dataset[:, ::100]
    # datasets = [d[0], d[10], d[20], d[50], d[53]]
    # labels = ['sample {}'.format(label) for label in
    #           ["S1", "S10", "S20", "S50", "S53"]]
    #
    # # plot multiple
    # plot_multiple(method='scatter',
    #               datasets=datasets, labels=labels, legend='best',
    #               output=os.path.join(figures_dir,
    #                                   'multiple_IR_dataset_1D_scatter'),
    #               savedpi=150)
    #
    # # plot mupltiple with style
    # plot_multiple(method='scatter', style='sans',
    #               datasets=datasets, labels=labels, legend='best',
    #               output=os.path.join(figures_dir,
    #                                   'multiple_IR_dataset_1D_scatter_sans'),
    #               savedpi=150)
    #
    # # check that style reinit to default
    # plot_multiple(method='scatter',
    #               datasets=datasets, labels=labels, legend='best',
    #               output='multiple_IR_dataset_1D_scatter',
    #               savedpi=150)
    # try:
    #     assert same_images('multiple_IR_dataset_1D_scatter',
    #                        os.path.join(figures_dir,
    #                                     'multiple_IR_dataset_1D_scatter'))
    # except AssertionError:
    #     os.remove('multiple_IR_dataset_1D_scatter.png')
    #     raise AssertionError('comparison fails')
    # os.remove('multiple_IR_dataset_1D_scatter.png')

    # plot 1D column
    col = dataset[:, 3500.]  # note the indexing using wavenumber!
    _ = col.plot_scatter()

    show()
def test_read_quadera():
    # single file
    A = NDDataset.read_quadera(MSDATA / "ion_currents.asc")
    assert str(A) == "NDDataset: [float64] A (shape: (y:16975, x:10))"
Пример #20
0
def test_read_opus():
    # single file
    A = NDDataset.read_opus(os.path.join('irdata', 'OPUS', 'test.0000'))
    assert A.shape == (1, 2567)
    assert A[0, 2303.8694].data == pytest.approx(2.72740, 0.00001)

    # using a window path
    A1 = NDDataset.read_opus('irdata\\OPUS\\test.0000')
    assert A1.shape == (1, 2567)

    # single file specified with pathlib
    datadir = Path(prefs.datadir)
    p = datadir / 'irdata' / 'OPUS' / 'test.0000'
    A2 = NDDataset.read_opus(p)
    assert A2.shape == (1, 2567)

    # multiple files not merged
    B = NDDataset.read_opus('test.0000', 'test.0001', 'test.0002', directory=os.path.join('irdata', 'OPUS'))
    assert isinstance(B, NDDataset)
    assert len(B) == 3

    # multiple files merged as the merge keyword is set to true
    C = scp.read_opus('test.0000', 'test.0001', 'test.0002', directory=os.path.join('irdata', 'OPUS'), merge=True)
    assert C.shape == (3, 2567)

    # multiple files to merge : they are passed as a list)
    D = NDDataset.read_opus(['test.0000', 'test.0001', 'test.0002'], directory=os.path.join('irdata', 'OPUS'))
    assert D.shape == (3, 2567)

    # multiple files not merged : they are passed as a list but merge is set to false
    E = scp.read_opus(['test.0000', 'test.0001', 'test.0002'], directory=os.path.join('irdata', 'OPUS'), merge=False)
    assert isinstance(E, list)
    assert len(E) == 3

    # read contents
    p = datadir / 'irdata' / 'OPUS' / 'test.0000'
    content = p.read_bytes()
    F = NDDataset.read_opus({
            p.name: content
            })
    assert F.name == p.name
    assert F.shape == (1, 2567)

    # read multiple contents
    lst = [datadir / 'irdata' / 'OPUS' / f'test.000{i}' for i in range(3)]
    G = NDDataset.read_opus({p.name: p.read_bytes() for p in lst})
    assert len(G) == 3

    # read multiple contents and merge them
    lst = [datadir / 'irdata' / 'OPUS' / f'test.000{i}' for i in range(3)]
    H = NDDataset.read_opus({p.name: p.read_bytes() for p in lst}, merge=True)
    assert H.shape == (3, 2567)

    # read without filename -> open a dialog
    K = NDDataset.read_opus()

    # read in a directory (assume homogeneous type of data - else we must use the read function instead)
    K = NDDataset.read_opus(datadir / 'irdata' / 'OPUS')
    assert K.shape == (4, 2567)

    # again we can use merge to avoid stacking of all 4 spectra
    J = NDDataset.read_opus(datadir / 'irdata' / 'OPUS', merge=False)
    assert isinstance(J, list)
    assert len(J) == 4

    # single opus file using generic read function
    # if the protocol is given it is similar to the read_opus function
    F = NDDataset.read(os.path.join('irdata', 'OPUS', 'test.0000'), protocol='opus')
    assert F.shape == (1, 2567)

    # No protocol?
    G = NDDataset.read(os.path.join('irdata', 'OPUS', 'test.0000'))
    assert G.shape == (1, 2567)
Пример #21
0
def test_read():
    f = Path('irdata/OPUS/test.0000')

    A1 = NDDataset.read_opus(f)
    assert A1.shape == (1, 2567)

    # single file read with protocol specified
    A2 = NDDataset.read(f, protocol='opus')
    assert A2 == A1

    A3 = scp.read('irdata/nh4y-activation.spg', protocol='omnic')
    assert str(A3) == 'NDDataset: [float64] a.u. (shape: (y:55, x:5549))'

    # single file without protocol
    # inferred from filename
    A4 = NDDataset.read(f)
    assert A4 == A1

    A5 = scp.read('irdata/nh4y-activation.spg')
    assert str(A5) == 'NDDataset: [float64] a.u. (shape: (y:55, x:5549))'

    # native format
    f = A5.save_as('nh4y.scp')
    A6 = scp.read('irdata/nh4y.scp')
    assert str(A6) == 'NDDataset: [float64] a.u. (shape: (y:55, x:5549))'

    A7 = scp.read('nh4y', directory='irdata', protocol='scp')
    assert str(A7) == 'NDDataset: [float64] a.u. (shape: (y:55, x:5549))'

    A8 = scp.read('nh4y', directory='irdata')
    assert str(A8) == 'NDDataset: [float64] a.u. (shape: (y:55, x:5549))'

    f.unlink()

    # multiple compatible 1D files automatically merged
    B = NDDataset.read('test.0000',
                       'test.0001',
                       'test.0002',
                       directory=os.path.join('irdata', 'OPUS'))
    assert str(B) == 'NDDataset: [float64] a.u. (shape: (y:3, x:2567))'
    assert len(B) == 3

    # multiple compatible 1D files not merged if the merge keyword is set to False
    C = scp.read('test.0000',
                 'test.0001',
                 'test.0002',
                 directory=os.path.join('irdata', 'OPUS'),
                 merge=False)
    assert isinstance(C, list)

    # multiple 1D files to merge
    D = NDDataset.read(['test.0000', 'test.0001', 'test.0002'],
                       directory=os.path.join('irdata', 'OPUS'))
    assert D.shape == (3, 2567)

    # multiple 1D files not merged : they are passed as a list but merge is set to false
    E = scp.read(['test.0000', 'test.0001', 'test.0002'],
                 directory=os.path.join('irdata', 'OPUS'),
                 merge=False)
    assert isinstance(E, list)
    assert len(E) == 3

    # read contents
    datadir = Path(prefs.datadir)
    p = datadir / 'irdata' / 'OPUS' / 'test.0000'
    content = p.read_bytes()
    F = NDDataset.read({p.name: content})
    assert F.name == p.name
    assert F.shape == (1, 2567)

    # read multiple 1D contents and merge them
    lst = [datadir / 'irdata' / 'OPUS' / f'test.000{i}' for i in range(3)]
    G = NDDataset.read({p.name: p.read_bytes() for p in lst})
    assert G.shape == (3, 2567)
    assert len(G) == 3

    # read multiple  1D contents awithout merging
    lst = [datadir / 'irdata' / 'OPUS' / f'test.000{i}' for i in range(3)]
    H = NDDataset.read({p.name: p.read_bytes() for p in lst}, merge=False)
    isinstance(H, list)
    assert len(H) == 3

    filename = datadir / 'wodger.spg'
    content = filename.read_bytes()

    # change the filename to be sure that the file will be read from the passed content
    filename = 'try.spg'

    # The most direct way to pass the byte content information
    nd = NDDataset.read(filename, content=content)
    assert str(nd) == 'NDDataset: [float64] a.u. (shape: (y:2, x:5549))'

    # It can also be passed using a dictionary structure {filename:content, ....}
    nd = NDDataset.read({filename: content})
    assert str(nd) == 'NDDataset: [float64] a.u. (shape: (y:2, x:5549))'

    # Case where the filename is not provided
    nd = NDDataset.read(content)
    assert str(nd) == 'NDDataset: [float64] a.u. (shape: (y:2, x:5549))'

    # Try with an .spa file
    filename = datadir / 'irdata/subdir/7_CZ0-100 Pd_101.SPA'
    content = filename.read_bytes()
    filename = 'try.spa'

    filename2 = datadir / 'irdata/subdir/7_CZ0-100 Pd_102.SPA'
    content2 = filename2.read_bytes()
    filename = 'try2.spa'

    nd = NDDataset.read({filename: content})
    assert str(nd) == 'NDDataset: [float64] a.u. (shape: (y:1, x:5549))'

    # Try with only a .spa content
    nd = NDDataset.read(content)
    assert str(nd) == 'NDDataset: [float64] a.u. (shape: (y:1, x:5549))'

    # Try with several .spa content (should be stacked into a single nddataset)
    nd = NDDataset.read({filename: content, filename2: content2})
    assert str(nd) == 'NDDataset: [float64] a.u. (shape: (y:2, x:5549))'

    nd = NDDataset.read(content, content2)
    assert str(nd) == 'NDDataset: [float64] a.u. (shape: (y:2, x:5549))'
Пример #22
0
def test_plotly2D():
    A = NDDataset.read_omnic("irdata/nh4y-activation.spg",
                             directory=prefs.datadir)
    A.y -= A.y[0]
    A.y.to("hour", inplace=True)
    A.y.title = "Acquisition time"
Пример #23
0
# ----
# Let's change this name

# %%
proj.name = 'myNMRdata'
proj

# %% [markdown]
# Now we will add a dataset to the project.
#
# First we read the dataset (here some NMR data) and we give it some name (e.g. 'nmr n°1')

# %%
datadir = pathclean(prefs.datadir)
path = datadir / 'nmrdata' / 'bruker' / 'tests' / 'nmr'
nd1 = NDDataset.read_topspin(path / 'topspin_1d', expno=1, remove_digital_filter=True, name="NMR_1D")
nd2 = NDDataset.read_topspin(path / 'topspin_2d', expno=1, remove_digital_filter=True, name='NMR_2D')

# %% [markdown]
# To add it to the project, we use the `add_dataset` function for a single dataset:

# %%
proj.add_datasets(nd1)

# %% [markdown]
# or `add_datasets` for several datasets.

# %%
proj.add_datasets(nd1, nd2)

# %% [markdown]
# In the first syntax we load the library into a namespace called `scp`
# (we recommend this name, but you can choose whatever
# you want - except something already in use):

# %%
import spectrochempy as scp  # SYNTAX 1

nd = scp.NDDataset()

# %% [markdown]
# or in the second syntax, with a wild `*` import.

# %%
from spectrochempy import *  # SYNTAX 2

nd = NDDataset()

# %% [markdown]
# With the second syntax, as often in python, the access to objects/functions can be greatly simplified. For example,
# we can use directly `NDDataset` without a prefix instead of `scp.NDDataset`
# which is the first syntax but there is always a
# risk of overwriting some variables or functions already present in the namespace.
# Therefore, the first syntax is generally highly recommended.
#
# Alternatively, you can also load only the objects and function required by your application:
#

# %%
from spectrochempy import NDDataset  # SYNTAX 3

nd = NDDataset()
Пример #25
0
# *  `mask`: Data can be partially masked at will
# *  `units`: Data can have units, allowing units-aware operations
# *  `coordset`: Data can have a set of coordinates, one or several by dimensions
#
# Additional metadata can also be added to the instances of this class through the `meta` properties.

# %% [markdown]
# ## 1D-Dataset (unidimensional dataset)

# %% [markdown]
# In the following example, a minimal 1D dataset is created from a simple list, to which we can add some metadata:

# %%
d1D = NDDataset(
    [10.0, 20.0, 30.0],
    name="Dataset N1",
    author="Blake and Mortimer",
    description="A dataset from scratch",
)
d1D

# %% [markdown]
# <div class='alert alert-info'>
#     <b>Note</b>
#
#  In the above code, run in a notebook, the output of d1D is in html for a nice display.
#
#  To get the same effect, from a console script, one can use `print_` (with an underscore) and not the usual python
#  function `print`. As you can see below, the `print` function only gives a short summary of the information,
#  while the `print_` method gives more detailed output
#
# </div>
Пример #26
0
def test_importer(monkeypatch, fs):

    fs.create_file("/var/data/xx1.txt")
    assert os.path.exists("/var/data/xx1.txt")

    # mock filesystem
    fs.create_dir(DATADIR)

    # try to read unexistent scp file
    f = DATADIR / "fakedir/fakescp.scp"
    with pytest.raises(FileNotFoundError):
        read(f)

    # make fake file
    fs.create_file(f)
    monkeypatch.setattr(NDDataset, "load", fake_dataset)

    nd = read(f)
    assert nd == fake_dataset(f)

    nd = read(f.stem, directory=DATADIR / "fakedir/", protocol="scp")
    assert nd == fake_dataset(f)

    nd = read(f.stem, directory=DATADIR / "fakedir/")
    assert nd == fake_dataset(f)

    # Generic read without parameters and dialog cancel
    monkeypatch.setattr(spectrochempy.core, "open_dialog", dialog_cancel)
    monkeypatch.setenv(
        "KEEP_DIALOGS",
        "True")  # we ask to display dialogs as we will mock them.

    nd = read()
    assert nd is None

    # read as class method
    nd1 = NDDataset.read()
    assert nd1 is None

    # NDDataset instance as first arguments
    nd = NDDataset()
    nd2 = nd.read()
    assert nd2 is None

    nd = read(default_filter="matlab")
    assert nd is None

    # Check if Filetype is not known
    f = DATADIR / "fakedir/not_exist_fake.fk"
    with pytest.raises(TypeError):
        read_fake(f)

    # Make fake type acceptable
    FILETYPES.append(("fake", "FAKE files (*.fk)"))
    ALIAS.append(("fk", "fake"))
    monkeypatch.setattr("spectrochempy.core.readers.importer.FILETYPES",
                        FILETYPES)
    monkeypatch.setattr("spectrochempy.core.readers.importer.ALIAS", ALIAS)

    # Check not existing filename
    f = DATADIR / "fakedir/not_exist_fake.fk"
    with pytest.raises(FileNotFoundError):
        read_fake(f)

    # Generic read with a wrong protocol
    with pytest.raises(spectrochempy.utils.exceptions.ProtocolError):
        read(f, protocol="wrongfake")

    # Generic read with a wrong file extension
    with pytest.raises(TypeError):
        g = DATADIR / "fakedir/otherfake.farfelu"
        read(g)

    # Mock file
    f = DATADIR / "fakedir/fake.fk"
    fs.create_file(f)

    # specific read_(protocol) function
    nd = read_fk(f)
    assert nd == fake_dataset()

    # should also be a Class function
    nd = NDDataset.read_fk(f)
    assert nd == fake_dataset()

    # and a NDDataset instance function
    nd = NDDataset().read_fk(f)
    assert nd == fake_dataset()

    # single file without protocol inferred from filename
    nd = read(f)
    assert nd == fake_dataset()

    # single file read with protocol specified
    nd = read(f, protocol="fake")
    assert nd == fake_dataset()

    # attribute a new name
    nd = read(f, name="toto")
    assert nd.name == "toto"

    # mock some fake file and assume they exists
    f1 = DATADIR / "fakedir/fake1.fk"
    f2 = DATADIR / "fakedir/fake2.fk"
    f3 = DATADIR / "fakedir/fake3.fk"
    f4 = DATADIR / "fakedir/fake4.fk"
    f5 = DATADIR / "fakedir/otherdir/otherfake.fk"
    f6 = DATADIR / "fakedir/emptyfake.fk"  # return None when reader
    fs.create_file(f1)
    fs.create_file(f2)
    fs.create_file(f3)
    fs.create_file(f4)
    fs.create_file(f5)
    fs.create_file(f6)
    # l = list(pathclean("/Users/christian/test_data/fakedir").iterdir())

    # multiple compatible 1D files automatically merged
    nd = read(f1, f2, f3)
    assert nd.shape == (3, 3)

    nd = read([f1, f2, f3], name="fake_merged")
    assert nd.shape == (3, 3)
    assert nd.name == "fake_merged"

    # multiple compatible 1D files not merged if the merge keyword is set to False
    nd = read([f1, f2, f3], names=["a", "c", "b"], merge=False)
    assert isinstance(nd, list)
    assert len(nd) == 3 and nd[0] == fake_dataset()
    assert nd[1].name == "c"

    # do not merge inhomogeneous dataset
    nd = read([f1, f2, f5])
    assert isinstance(nd, list)

    # too short list of names.  Not applied
    nd = read([f1, f2, f3], names=["a", "c"], merge=False)
    assert nd[0].name.startswith("NDDataset")

    monkeypatch.setattr(spectrochempy.core, "open_dialog", dialog_open)
    nd = (
        read()
    )  # should open a dialog (but to selects individual filename (here only simulated)
    assert nd.shape == (2, 3)

    # read in a directory
    monkeypatch.setattr(pathlib.Path, "glob", directory_glob)

    # directory selection
    nd = read(protocol="fake", directory=DATADIR / "fakedir")
    assert nd.shape == (4, 3)

    nd = read(protocol="fake", directory=DATADIR / "fakedir", merge=False)
    assert len(nd) == 4
    assert isinstance(nd, list)

    nd = read(listdir=True, directory=DATADIR / "fakedir")
    assert len(nd) == 4
    assert not isinstance(nd, list)

    # if a directory is passed as a keyword, the behavior is different:
    # a dialog for file selection occurs except if listdir is set to True
    nd = read(directory=DATADIR / "fakedir", listdir=False)
    assert nd.shape == (2, 3)  # -> file selection dialog

    nd = read(directory=DATADIR / "fakedir", listdir=True)
    assert nd.shape == (4, 3)  # -> directory selection dialog

    # read_dir()

    nd = read_dir(DATADIR / "fakedir")
    assert nd.shape == (4, 3)

    nd1 = read_dir()
    assert nd1 == nd

    nd = read_dir(
        directory=DATADIR / "fakedir"
    )  # open a dialog to eventually select directory inside the specified
    # one
    assert nd.shape == (4, 3)

    fs.create_file(DATADIR / "fakedir/subdir/fakesub1.fk")
    nd = read_dir(directory=DATADIR / "fakedir", recursive=True)
    assert nd.shape == (5, 3)

    # no merging
    nd = read_dir(directory=DATADIR / "fakedir", recursive=True, merge=False)
    assert len(nd) == 5
    assert isinstance(nd, list)

    # Simulate reading a content
    nd = read({"somename.fk": "a fake content"})
    assert nd == fake_dataset(content=True)
    nd = read_fake({"somename.fk": "a fake content"})
    assert nd == fake_dataset(content=True)

    # read multiple contents and merge them
    nd = read({
        "somename.fk": "a fake content",
        "anothername.fk": "another fake content",
        "stillanothername.fk": "still another fake content",
    })
    assert nd.shape == (3, 3)

    # do not merge
    nd = read(
        {
            "somename.fk": "a fake content",
            "anothername.fk": "another fake content"
        },
        merge=False,
    )
    assert isinstance(nd, list)
    assert len(nd) == 2
Пример #27
0
# To load the API, you must import it using one of the following syntax.
#
# In the first syntax we load the library into a namespace called `scp` (you can choose whatever you want - except
# something already in use):

# %%
import spectrochempy as scp  # SYNTAX 1
nd = scp.NDDataset()


# %% [markdown]
# or in the second syntax, with a wild `*` import.

# %%
from spectrochempy import *
nd = NDDataset()

# %% [markdown]
# With the second syntax, as often in python, the access to objects/functions can be greatly simplified. For example,
# we can use "NDDataset" without a prefix instead of `scp.NDDataset` which is the first syntax) but there is always a
# risk of overwriting some variables or functions already present in the namespace.
# Therefore, the first syntax is generally highly recommended.
#
# Alternatively, you can also load only the onbjects and function required by your application:
#

# %%
from spectrochempy import NDDataset

nd = NDDataset()
Пример #28
0
# handling spectroscopic information, one of the major objectives of the SpectroChemPy package:
#
# *  `mask`: Data can be partially masked at will
# *  `units`: Data can have units, allowing units-aware operations
# *  `coordset`: Data can have a set of coordinates, one or sevral by dimensions
#
# Additional metadata can also be added to the instances of this class through the `meta` properties.

# %% [markdown]
# ## 1D-Dataset (unidimensional dataset)

# %% [markdown]
# In the following example, a minimal 1D dataset is created from a simple list, to which we can add some metadata:

# %%
d1D = NDDataset([10., 20., 30.], name="Dataset N1", author='Blake and Mortimer', description='A dataset from scratch')
d1D

# %% [markdown]
# <div class='alert alert-info'>
#     <b>Note</b>
#
#  In the above code, run in a notebook, the output of d1D is in html for a nice display.
#
#  To get the same effect, from a console script, one can use `print_` (with an underscore) and not the usual python
#  function `print`. As you can see below, the `print` function only gives a short summary of the information,
#  while the `print_` method gives more detailed output
#
# </div>

# %%