def __init__(self, config_filepath): """ Parameters: ----------- config_filename : None/str Defaults to project config: config.json. """ self.config = data.load_json(config_filepath) self.m2l_dirpath = ROOT / self.config["operator_dirname"] # Load operators and key2index lookup tables operator_files = self.m2l_dirpath.glob('m2l_level*') index_to_key_files = self.m2l_dirpath.glob('index*') self.operators = { level: None for level in range(2, self.config['octree_max_level']+1) } self.index_to_key = { level: None for level in range(2, self.config['octree_max_level']+1) } for filename in operator_files: level = self.get_level(str(filename)) self.operators[level] = data.load_pickle( f'm2l_level_{level}', self.m2l_dirpath ) for filename in index_to_key_files: level = self.get_level(str(filename)) self.index_to_key[level] = data.load_pickle( f'index_to_key_level_{level}', self.m2l_dirpath )
def main(): args = get_parser().parse_args() config = Config.from_file(args.config) logger = get_logger(config.output_path) logger.info(args) logger.info("=> Starting evaluation ...") logger.info("Load data") corpus = io.load_json(config.input_path, append_title=config.use_title) logger.info("Perform preprocessing") preprocessed_corpus = Preprocessing( corpus["keywords"], config=config.preprocessing, datatype="keywords", logger=logger, ).apply_preprocessing() preprocessed_corpus["token"] = preprocessed_corpus["token"].apply(flatten) preprocessed_corpus.drop("abstract", axis=1, inplace=True) logger.info("Start clustering") clustering = Clustering( preprocessed_corpus, clustering_config=config.clustering, dim_reduction_config=config.dim_reduction, logger=logger, ) model = clustering.perform_clustering() logger.info(f"Save results to {config.output_path}") corpus["label"] = model.labels_ io.write_json(config.input_path.split(".")[0] + "_labeled.json", corpus)
def __init__(self, config_filename=None): if config_filename is not None: config_filepath = PARENT / config_filename else: config_filepath = PARENT / "config.json" self.config = data.load_json(config_filepath) data_dirpath = PARENT / self.config["data_dirname"] operator_dirpath = PARENT / self.config["operator_dirname"] source_filename = self.config['source_filename'] target_filename = self.config['target_filename'] source_densities_filename = self.config['source_densities_filename'] # Load sources, targets and source densities self.surface = data.load_hdf5_to_array('surface', 'surface', operator_dirpath) self.sources = data.load_hdf5_to_array(source_filename, source_filename, data_dirpath) self.targets = data.load_hdf5_to_array(target_filename, target_filename, data_dirpath) self.source_densities = data.load_hdf5_to_array( source_densities_filename, source_densities_filename, data_dirpath) # Load precomputed operators self.uc2e_u = data.load_hdf5_to_array('uc2e_u', 'uc2e_u', operator_dirpath) self.uc2e_v = data.load_hdf5_to_array('uc2e_v', 'uc2e_v', operator_dirpath) self.m2m = data.load_hdf5_to_array('m2m', 'm2m', operator_dirpath) self.l2l = data.load_hdf5_to_array('l2l', 'l2l', operator_dirpath) self.m2l = data.load_pickle('m2l_compressed', operator_dirpath) # Load configuration properties self.maximum_level = self.config['octree_max_level'] self.kernel_function = kernel.KERNELS[self.config['kernel']]() self.order = self.config['order'] self.octree = octree.Octree(self.sources, self.targets, self.maximum_level, self.source_densities) # Coefficients discretising surface of a node self.ncoefficients = 6 * (self.order - 1)**2 + 2 # Containers for results self.result_data = [ density.Potential(target, np.zeros(1, dtype='float64')) for target in self.octree.targets ] self.source_data = { key: node.Node(key, self.ncoefficients) for key in self.octree.non_empty_source_nodes } self.target_data = { key: node.Node(key, self.ncoefficients) for key in self.octree.non_empty_target_nodes }
data_func = DATA[dtype] sources, targets, source_densities = data_func(npoints) data.save_array_to_hdf5(data_dirpath, 'sources', sources) data.save_array_to_hdf5(data_dirpath, 'targets', targets) data.save_array_to_hdf5(data_dirpath, 'source_densities', source_densities) if __name__ == "__main__": if len(sys.argv) < 4: raise ValueError( f'Must Specify Config Filepath number of points and data type!\ e.g. `python generate_test_data.py /path/to/config.json 100 random`' ) elif sys.argv[3] not in DATA.keys(): raise ValueError( f'Data type `{sys.argv[3]}` not valid. Must be either`separated` or `random`' ) else: config_filepath = sys.argv[1] npoints = sys.argv[2] dtype = sys.argv[3] config = data.load_json(config_filepath) config['npoints'] = int(npoints) config['dtype'] = dtype main(**config)
import pathlib import numpy as np import pytest import fmm.hilbert as hilbert from fmm.kernel import KERNELS from fmm.octree import Octree import fmm.operator as operator import utils.data as data HERE = pathlib.Path(os.path.dirname(os.path.abspath(__file__))) ROOT = HERE.parent.parent CONFIG_FILEPATH = HERE.parent.parent / "test_config.json" CONFIG = data.load_json(CONFIG_FILEPATH) ORDER = CONFIG['order'] SURFACE = operator.compute_surface(ORDER) KERNEL_FUNCTION = KERNELS['laplace']() OPERATOR_DIRPATH = HERE.parent.parent / CONFIG['operator_dirname'] DATA_DIRPATH = HERE.parent.parent / CONFIG['data_dirname'] RTOL = 1e-1 @pytest.fixture def octree(): sources = data.load_hdf5_to_array('sources', 'sources', DATA_DIRPATH) targets = data.load_hdf5_to_array('targets', 'targets', DATA_DIRPATH)
import os import pathlib import subprocess import click from utils.data import load_json HELP_TEXT = """ Command Line Interface for PyExaFMM """ HERE = pathlib.Path(os.path.dirname(os.path.abspath(__file__))) CONFIG = load_json(HERE.parent / 'config.json') @click.group(help=HELP_TEXT) def cli(): pass @click.command(help='Build dev version in current python env') def build(): click.echo('Building and installing') subprocess.run(['conda', 'develop', '.']) @click.command(help='Run test suite') def test(): click.echo('Running test suite') subprocess.run(['pytest', 'fmm'])