def construct_datasets(self, tag_xyz_map):
        dc = self.dataset_config
        mc = self.model_config
        pc = self.prediction_config

        preprocesses = []
        for (name, args, kwargs) in dc.preprocesses:
            preprocess = PREPROCESS[name](*args, **kwargs)
            preprocess.load(pc.load_dir / 'preprocess' / f'{name}.npz',
                            verbose=self.verbose)
            preprocesses.append(preprocess)

        datasets = []
        for pattern in pc.tags:
            for tag in fnmatch.filter(tag_xyz_map, pattern):
                if self.verbose:
                    pprint(f'Construct sub dataset tagged as "{tag}"')
                tagged_xyz = tag_xyz_map.pop(tag)
                structures = AtomicStructure.read_xyz(tagged_xyz)

                # prepare descriptor dataset
                descriptor = DESCRIPTOR_DATASET[dc.descriptor](pc.order,
                                                               structures,
                                                               **dc.parameters)
                descriptor.make(verbose=self.verbose)

                ##Save descriptor (symmetory_function)
                descriptor_npz = pc.load_dir / f'{dc.descriptor}-pred.npz'
                descriptor.save(descriptor_npz, verbose=self.verbose)
                ##End of saving descriptor (symmetory_function)

                # prepare empty property dataset
                property_ = PROPERTY_DATASET[dc.property_](pc.order,
                                                           structures)

                # construct test dataset from descriptor & property datasets
                dataset = HDNNPDataset(descriptor, property_)
                dataset.construct(all_elements=pc.elements,
                                  preprocesses=preprocesses,
                                  shuffle=False,
                                  verbose=self.verbose)
                datasets.append(dataset)
                dc.n_sample += dataset.total_size
                mc.n_input = dataset.n_input
                mc.n_output = dataset.n_label

        return datasets
Exemplo n.º 2
0
    def construct_test_datasets(self, tag_xyz_map, load_descriptor):
        dc = self.dataset_config
        mc = self.model_config
        tc = self.training_config
        preprocess_dir = tc.out_dir / 'preprocess'
        preprocesses = []
        for (name, args, kwargs) in dc.preprocesses:
            preprocess = PREPROCESS[name](*args, **kwargs)
            preprocess.load(preprocess_dir / f'{preprocess.name}.npz',
                            verbose=self.verbose)
            preprocesses.append(preprocess)

        datasets = []
        for pattern in tc.tags:
            for tag in fnmatch.filter(tag_xyz_map, pattern):
                if self.verbose:
                    pprint(f'Construct sub test dataset tagged as "{tag}"')
                tagged_xyz = tag_xyz_map.pop(tag)
                structures = AtomicStructure.read_xyz(tagged_xyz)

                # prepare descriptor dataset
                descriptor = DESCRIPTOR_DATASET[dc.descriptor](
                    self.loss_function.order['descriptor'], structures,
                    **dc.parameters)
                descriptor_npz = tagged_xyz.with_name(
                    f'{dc.descriptor}-test.npz')
                if load_descriptor:
                    descriptor.load(descriptor_npz,
                                    verbose=self.verbose,
                                    remake=dc.remake)
                else:
                    descriptor.make(verbose=self.verbose)
                    descriptor.save(descriptor_npz, verbose=self.verbose)

                # prepare property dataset
                property_ = PROPERTY_DATASET[dc.property_](
                    self.loss_function.order['property'], structures)
                property_npz = tagged_xyz.with_name(f'{dc.property_}-test.npz')
                if property_npz.exists() and load_descriptor:
                    property_.load(property_npz,
                                   verbose=self.verbose,
                                   remake=dc.remake)
                else:
                    property_.make(verbose=self.verbose)
                    property_.save(property_npz, verbose=self.verbose)

                # construct test dataset from descriptor & property datasets
                dataset = HDNNPDataset(descriptor, property_)
                dataset.construct(all_elements=tc.elements,
                                  preprocesses=preprocesses,
                                  shuffle=False,
                                  verbose=self.verbose)
                dataset.scatter()
                datasets.append(dataset)
                dc.n_sample += dataset.total_size
                mc.n_input = dataset.n_input
                mc.n_output = dataset.n_label

        return datasets