class TestSSUResult: dataset = random_dataset(**SIMPLE_PRESET, n_samples=100) def test_ctor(self): as0 = self.dataset[0] result = SSUResult(as0, self.dataset.distribution_type, self.dataset.parameters, 1.0) assert result.name == as0.name assert result.sample is as0 assert result.n_iterations == self.dataset.n_samples def test_get_item(self): as0 = self.dataset[0] result = SSUResult(as0, self.dataset.distribution_type, self.dataset.parameters, 1.0) sample = result[0] assert isinstance(sample, SSUResultComponent) def test_iter(self): as0 = self.dataset[0] result = SSUResult(as0, self.dataset.distribution_type, self.dataset.parameters, 1.0) for i, component in enumerate(result): assert isinstance(component, SSUResultComponent) def test_reverse_iter(self): as0 = self.dataset[0] result = SSUResult(as0, self.dataset.distribution_type, self.dataset.parameters, 1.0) for i, component in enumerate(reversed(result)): assert isinstance(component, SSUResultComponent) def test_slice(self): as0 = self.dataset[0] result = SSUResult(as0, self.dataset.distribution_type, self.dataset.parameters, 1.0) for component in result[1:]: assert isinstance(component, SSUResultComponent) def test_none_error(self): parameters = self.dataset.parameters.copy() with pytest.raises(AssertionError): SSUResult(None, self.dataset.distribution_type, parameters, 1.0) with pytest.raises(AssertionError): SSUResult(self.dataset[0], None, parameters, 1.0) with pytest.raises(AssertionError): SSUResult(self.dataset[0], self.dataset.distribution_type, None, 1.0) with pytest.raises(AssertionError): SSUResult(self.dataset[0], self.dataset.distribution_type, parameters, None) def test_ndim_error(self): parameters = self.dataset.parameters.copy() with pytest.raises(AssertionError): SSUResult(self.dataset[0], self.dataset.distribution_type, parameters[0], 1.0) def test_n_parameters_error(self): parameters = self.dataset.parameters.copy() with pytest.raises(AssertionError): SSUResult(self.dataset[0], DistributionType.Normal, parameters, 1.0) def test_index_error(self): as0 = self.dataset[0] result = SSUResult(as0, self.dataset.distribution_type, self.dataset.parameters, 1.0) with pytest.raises(TypeError): result["C1"] with pytest.raises(TypeError): result[:, 0] def test_apis(self): # `ArtificialSample` has similar apis with `SSUResult`, to make them can be used in plotting charts as0 = self.dataset[0] result = SSUResult(as0, self.dataset.distribution_type, self.dataset.parameters, 1.0) apis = [ "name", "classes", "classes_phi", "distribution", "sample", ("sample", "distribution"), "is_valid" ] component_apis = [ "classes", "classes_phi", "distribution", "proportion", "mean", "sorting_coefficient", "skewness", "kurtosis" ] for api in apis: if isinstance(api, tuple): api, sub_api = api hasattr(getattr(result, api), sub_api) hasattr(getattr(as0, api), sub_api) assert hasattr(result, api) assert hasattr(as0, api) for api in component_apis: for component in result: hasattr(component, api) for component in as0: hasattr(component, api) def test_history(self): as0 = self.dataset[0] result = SSUResult(as0, self.dataset.distribution_type, self.dataset.parameters, 1.0) result_bytes = result.distribution.tobytes() for h in result.history: assert isinstance(h, SSUResult) assert h is not result # will not modify the data of original object assert result.distribution.tobytes() == result_bytes def test_loss(self): as0 = self.dataset[0] result = SSUResult(as0, self.dataset.distribution_type, self.dataset.parameters, 1.0) for name in built_in_losses: loss = result.loss(name) assert isinstance(loss, float) def test_loss_series(self): as0 = self.dataset[0] result = SSUResult(as0, self.dataset.distribution_type, self.dataset.parameters, 1.0) for name in built_in_losses: loss_series = result.loss_series(name) assert isinstance(loss_series, ndarray) assert len(loss_series) == result.n_iterations
class TestTryEMMA: dataset = random_dataset(**SIMPLE_PRESET, n_samples=100) x0 = np.array([[mean for (mean, std) in component] for component in SIMPLE_PRESET["target"]]).T x0 = x0[1:-1] @classmethod def log_message(cls, result: EMMAResult): print( "\n", f"The fitting task [{result.n_samples}, {result.n_members}, {result.kernel_type.name}] of " f"dataset [{result.dataset.name}] was finished using {result.n_iterations} iterations and " f"{result.time_spent:.2f} s.\nFitting settings: {result.settings}.\n" f"MSE: {result.loss('mse')}, LMSE: {result.loss('lmse')}, angular: {result.loss('angular')}.\n", sep="", end="\n") def test_one(self): result = try_emma(self.dataset, KernelType.Normal, self.dataset.n_components) self.log_message(result) assert isinstance(result, EMMAResult) def test_has_x0(self): result = try_emma(self.dataset, KernelType.Normal, self.dataset.n_components, x0=self.x0, pretrain_epochs=100) self.log_message(result) assert isinstance(result, EMMAResult) def test_cuda(self): result = try_emma(self.dataset, KernelType.Normal, self.dataset.n_components, x0=self.x0, device="cuda", pretrain_epochs=100) self.log_message(result) assert isinstance(result, EMMAResult) def test_cuda0(self): result = try_emma(self.dataset, KernelType.Normal, self.dataset.n_components, x0=self.x0, device="cuda:0", pretrain_epochs=100) self.log_message(result) assert isinstance(result, EMMAResult) def test_no_device(self): with pytest.raises(AssertionError): result = try_emma(self.dataset, KernelType.Normal, self.dataset.n_components, x0=self.x0, device="cuda:1", pretrain_epochs=100) def test_progress_callback(self): def callback(p: float): assert 0.0 <= p <= 1.0 result = try_emma(self.dataset, KernelType.Normal, self.dataset.n_components, pretrain_epochs=100, progress_callback=callback) def test_result_properties(self): result = try_emma(self.dataset, KernelType.Normal, self.dataset.n_components) properties = [ "dataset", "n_samples", "n_members", "n_classes", "n_iterations", "kernel_type", "proportions", "end_members", "time_spent", "x0", "history", "settings" ] for prop in properties: assert hasattr(result, prop) def test_no_history(self): result = try_emma(self.dataset, KernelType.Normal, self.dataset.n_components, need_history=False) assert result.n_iterations == 1 def test_history(self): result = try_emma(self.dataset, KernelType.Normal, self.dataset.n_components) proportions_bytes = result.proportions.tobytes() end_members_bytes = result.end_members.tobytes() for h in result.history: assert isinstance(h, EMMAResult) assert h is not result # will not modify the data of original object assert result.proportions.tobytes() == proportions_bytes assert result.end_members.tobytes() == end_members_bytes def test_loss(self): result = try_emma(self.dataset, KernelType.Normal, self.dataset.n_components) for loss_name in built_in_losses: loss_series = result.loss_series(loss_name) assert isinstance(loss_series, np.ndarray) assert len(loss_series) == result.n_iterations class_wise_losses = result.class_wise_losses(loss_name) assert isinstance(class_wise_losses, np.ndarray) assert len(class_wise_losses) == result.n_classes sample_wise_losses = result.sample_wise_losses(loss_name) assert isinstance(sample_wise_losses, np.ndarray) assert len(sample_wise_losses) == result.n_samples def test_all_kernels(self): for kernel_type in [ KernelType.Nonparametric, KernelType.Normal, KernelType.SkewNormal, KernelType.Weibull, KernelType.GeneralWeibull ]: result = try_emma(self.dataset, kernel_type, self.dataset.n_components)
from QGrain.models import DistributionType from QGrain.generate import random_dataset preset = dict(target=[[(0.0, 0.0), (10.2, 0.0), (1.1, 0.0), (1.0, 0.1)], [(0.0, 0.0), (7.5, 0.0), (1.2, 0.0), (2.0, 0.2)], [(0.0, 0.0), (5.0, 0.0), (1.0, 0.0), (2.5, 0.5)]], distribution_type=DistributionType.SkewNormal) dataset = random_dataset(**preset, n_samples=100, min_size=0.02, max_size=2000.0, n_classes=101, precision=4, noise=5)
class TestTrySSU: dataset = random_dataset(**SIMPLE_PRESET, n_samples=10) x0 = np.array([[mean for (mean, std) in component] for component in SIMPLE_PRESET["target"]]).T x0 = x0[1:] @classmethod def log_message(cls, result: SSUResult, message: str): print("\n", f"The fitting task [{len(result)}, {result.distribution_type.name}] of sample [{result.name}] " f"was finished using {result.n_iterations} iterations, message: {message}.\n" f"MSE: {result.loss('mse')}, LMSE: {result.loss('lmse')}, angular: {result.loss('angular')}.\n" f"Target Mz: ({', '.join([f'{c.mean:.2f}' for c in result.sample])}), " f"Estimated Mz: ({', '.join([f'{c.mean:.2f}' for c in result])}).\n" f"Target So: ({', '.join([f'{c.sorting_coefficient:.2f}' for c in result.sample])}), " f"Estimated So: ({', '.join([f'{c.sorting_coefficient:.2f}' for c in result])}).\n" f"Target p: ({', '.join([f'{c.proportion:.2f}' for c in result.sample])}), " f"Estimated p: ({', '.join([f'{c.proportion:.2f}' for c in result])}).", sep="", end="\n") def test_one(self): result, message = try_ssu(self.dataset[0], DistributionType.Normal, self.dataset.n_components, loss="lmse") self.log_message(result, message) assert isinstance(result, SSUResult) def test_no_history(self): result, message = try_ssu(self.dataset[0], DistributionType.Normal, self.dataset.n_components, loss="lmse", need_history=False) self.log_message(result, message) assert isinstance(result, SSUResult) assert result.n_iterations == 1 def test_has_x0(self): result, message = try_ssu(self.dataset[0], DistributionType.Normal, self.dataset.n_components, x0=self.dataset.parameters[0, 1:, :], loss="lmse") self.log_message(result, message) assert isinstance(result, SSUResult) def test_try_global(self): result, message = try_ssu(self.dataset[0], DistributionType.Normal, self.dataset.n_components, try_global=True) self.log_message(result, message) assert isinstance(result, SSUResult) def test_all_samples(self): for i, sample in enumerate(self.dataset): result, message = try_ssu(sample, DistributionType.Normal, self.dataset.n_components, x0=self.x0) assert isinstance(result, SSUResult) def test_try_dataset(self): options = dict(x0=self.x0) results, failed_indexes = try_dataset(self.dataset, DistributionType.Normal, self.dataset.n_components, n_processes=4, options=options) print("\n", "Using try_dataset to fit all samples", len(results), len(failed_indexes)) def test_all_losses(self): for loss in built_in_losses: result, message = try_ssu(self.dataset[0], DistributionType.Normal, self.dataset.n_components, x0=self.x0, loss=loss) if isinstance(result, SSUResult): print(loss) self.log_message(result, message) else: print("\n", loss, message, end="\n") def test_all_optimizers(self): for optimizer in built_in_optimizers: result, message = try_ssu(self.dataset[0], DistributionType.Normal, self.dataset.n_components, x0=self.x0, loss="rmse", optimizer=optimizer) if isinstance(result, SSUResult): print(optimizer) self.log_message(result, message) else: print("\n", optimizer, message, end="\n")
('Fine', 'Silt'): 0.11250000000000002, ('Very fine', 'Silt'): 0.0868, ('Very coarse', 'Clay'): 0.0669, ('Coarse', 'Clay'): 0.052, ('Medium', 'Clay'): 0.0251, ('Fine', 'Clay'): 0.006, ('Very fine', 'Clay'): 0.0007999999999999999}, 'group_folk54': 'Slit', '_group_bp12_symbols': ['(s)', '(c)', 'SI'], 'group_bp12_symbol': '(s)(c)SI', 'group_bp12': 'Slightly Sandy Slightly Clayey Silt'} from QGrain.generate import SIMPLE_PRESET, random_dataset from QGrain.io import save_statistics dataset = random_dataset(**SIMPLE_PRESET, n_samples=200) save_statistics(dataset, "./Statistics.xlsx") from QGrain.generate import SIMPLE_PRESET, random_sample from QGrain.statistics import * sample = random_sample(**SIMPLE_PRESET) # statistical parameters s = arithmetic(sample.classes, sample.distribution) s = geometric(sample.classes, sample.distribution) s = logarithmic(sample.classes_phi, sample.distribution) ppf = reversed_phi_ppf(sample.classes_phi, sample.distribution) s = geometric_fw57(ppf) s = logarithmic_fw57(ppf) # proportions