class FileSampler(): def __init__(self, configuration, data, structure_name, structure_path, workflow_type, workflow_definition): self.initialize_configuration(configuration) self.initialize_data(data) self.structure_name = structure_name self.strucutre_path = structure_path self.workflow_type = workflow_type self.workflow_definition = workflow_definition self.potential_definition = self.configuration.potential def initialize_configuration(self,configuration): if isinstance(configuration,PyposmatConfigurationFile): self.configuration = configuration elif isinstance(configuration,str): self.configuration = PyposmatConfigurationFile() self.configuration.read(filename=configuration) else: msg = ("configuration must be a path to a configuration file or an " "instance of the PyposmatConfigurationFile,") raise TypeError(msg) def initialize_data(self,data): if isinstance(data,PyposmatDataFile): self.data = data elif isinstance(data,str): self.data = PyposmatDataFile() self.data.read(filename=data) else: msg = ("data must be a path to a data file or an instance of " "PyposmatDataFile.") raise TypeError(msg) def run(self): for index,row in self.data.df.iterrows(): sim_id = row['sim_id'] print('working on sim_id:{}'.format(sim_id)) parameters = OrderedDict([(k,row[k]) for k in self.configuration.parameter_names]) original_path = os.getcwd() os.mkdir(sim_id) os.chdir(sim_id) if workflow_type == 'lmps_thermal_expansion': workflow = LammpsThermalExpansion( structure_name=Si_structure_definition['name'], structure_path=Si_structure_definition['filename'], **workflow_definition) workflow.create_task_configurations() workflow.create_tasks() workflow.prepare_tasks( potential_definition = self.potential_definition, potential_parameters = parameters) workflow.run() os.chdir(original_path)
def initialize_kde_data(self,kde_data_fn=None,o_kde_data=None): if kde_data_fn is not None and o_kde_data is not None: m = ( "must either provide the path to kde_data_fn or a PyposmatDataFile" "instance to to o_kde_data" ) raise TypeError(m) # default behavior elif kde_data_fn is None and o_kde_data is None: self.kde_data = PyposmatDataFile() self.kde_data.read(filename=self.kde_data_fn) # a path is provided elif isinstance(kde_data_fn,str): self.kde_data_fn = kde_data_fn self.kde_data = PyposmatDataFile() self.kde_data.read(filename=self.kde_data_fn) # an object is provided elif isinstance(o_kde_data,PyposmatDataFile): self.kde_data_fn = None self.kde_data = o_kde_data else: m = ( "must either provide the path to kde_data_fn or a PyposmatDataFile" "instance to to o_kde_data" ) raise TypeError(m)
def write_kde_file(self,filename): names = ['sim_id'] + self.parameter_names + self.qoi_names + self.error_names self.kde_data = PyposmatDataFile() self.kde_data.read(filename=self.results_data_fn) self.kde_data.df = self.kde_data.df.iloc[list(self.filter_set_info['is_survive_idx'])] self.kde_data.write(filename=filename)
def test__write_simulation_results__no_filename(): cleanup_test() parameter_names = ['param{}'.format(i + 1) for i in range(3)] qoi_names = ['qoi{}'.format(i + 1) for i in range(5)] error_names = ['err{}'.format(i + 1) for i in range(5)] datafile = PyposmatDataFile() datafile.write_header_section(parameter_names=parameter_names, qoi_names=qoi_names, error_names=error_names, filename=datafile_out_fn) sim_id = "test_id" results = OrderedDict() results['parameters'] = OrderedDict([(v, 1.) for v in parameter_names]) results['qois'] = OrderedDict([(v, 2.) for v in qoi_names]) results['errors'] = OrderedDict([(v, 3.0) for v in error_names]) datafile.write_simulation_results(sim_id, results) assert os.path.isfile(datafile_out_fn) datafile_read = PyposmatDataFile() datafile_read.read(filename=datafile_out_fn)
def test__attribute__names__after_reading_file(): datafile_in_fn = "../../../../../data/MgO_pareto_data/culled_004.out" datafile = PyposmatDataFile() datafile.read(datafile_in_fn) assert type(datafile.names) is list
def dev__get_descriptive_statistics__from_kde_file(): print(80 * '-') print( '{:^80}'.format('method -> get_descriptive_statistics__from_kde_file')) testing_set = get_testing_set() config_fn = testing_set['config_fn'] results_data_fn = testing_set['results_fn'] kde_data_fn = testing_set['kde_fn'] assert os.path.isfile(config_fn) assert os.path.isfile(results_data_fn) assert os.path.isfile(kde_data_fn) o = PyposmatDataAnalyzer(config_fn=config_fn, results_data_fn=results_data_fn) kde_data = PyposmatDataFile() kde_data.read(filename=kde_data_fn) descriptive_statistics = o.get_descriptive_statistics(df=kde_data.df) print( o.str__descriptive_statistics( descriptive_statistics=descriptive_statistics)) print(kde_data.df.shape)
def read_data(self, filename): self.datafile = PyposmatDataFile() self.datafile.read(filename=filename) self.parameter_names = list(self.datafile.parameter_names) self.qoi_names = list(self.datafile.qoi_names) self.error_names = list(self.datafile.error_names) self.param_names = list(self.datafile.parameter_names) self.qoi_names = list(self.datafile.qoi_names) self.err_names = list(self.datafile.error_names) print("parameter names") print(type(self.param_names)) for i, v in enumerate(self.param_names): print("{:3} {:<20}".format(i, v)) print("qoi names") print(type(self.qoi_names)) for i, v in enumerate(self.qoi_names): print("{:3} {:<20}".format(i, v)) print("error_names") print(type(self.err_names)) for i, v in enumerate(self.err_names): print("{:3} {:<20}".format(i, v)) # generate pandas dataframes self.param_df = copy.deepcopy(self.datafile.df[self.param_names]) self.qoi_df = copy.deepcopy(self.datafile.df[self.qoi_names]) self.err_df = copy.deepcopy(self.datafile.df[self.err_names]) self.total_df = pd.concat([self.param_df, self.qoi_df, self.err_df], axis=1)
def read_data(self, filename): self.data = PyposmatDataFile() self.data.read(filename) self.df = self.data.df self.parameter_names = self.data.parameter_names self.error_names = self.data.error_names self.qoi_names = self.data.qoi_names
def test__read__wo_named_arguments(): datafile = PyposmatDataFile() datafile.read(MgO_datafile) assert type(datafile.names) is list assert len(expected_names) == len(datafile.names) for i, v in enumerate(expected_names): assert expected_names[i] == datafile.names[i] assert type(datafile.parameter_names) is list assert len(parameter_names) == len(datafile.parameter_names) for i, v in enumerate(parameter_names): assert parameter_names[i] == datafile.parameter_names[i] assert type(datafile.qoi_names) is list assert len(qoi_names) == len(datafile.qoi_names) for i, v in enumerate(qoi_names): assert qoi_names[i] == datafile.qoi_names[i] assert type(datafile.error_names) is list assert len(error_names) == len(datafile.error_names) for i, v in enumerate(error_names): assert error_names[i] == datafile.error_names[i] assert type(datafile.df) is pd.DataFrame
def dev__read(): testing_set = get_testing_set() o = PyposmatDataFile() o.read(filename=testing_set['results_data_fn']) print(o.df['sim_id'])
def set_data(self, data): if isinstance(data, str): self.data = PyposmatDataFile() self.data.read(data) elif isinstance(data, PyposmatDataFile): self.data = data else: raise TypeError
def initialize_data(self,data): if isinstance(data,PyposmatDataFile): self.data = data elif isinstance(data,str): self.data = PyposmatDataFile() self.data.read(filename=data) else: msg = ("data must be a path to a data file or an instance of " "PyposmatDataFile.") raise TypeError(msg)
def test__read_datafile(): from pypospack.pyposmat.data import PyposmatDataFile o_data = PyposmatDataFile() o_data.read(filename=datafile_fn) o_rugplot = PyposmatParetoRugplot() o_rugplot.read_datafile(filename=datafile_fn) import pandas as pd assert type(o_rugplot.data.df) is pd.DataFrame
def __init__(self, configuration_fn, datafile_fn): self.configuration_fn = configuration_fn self.datafile_fn = datafile_fn if configuration_fn is not None: self.configuration = PyposmatConfigurationFile() self.configuration.read(configuration_fn) if datafile_fn is not None: self.datafile = PyposmatDataFile() self.datafile.read(filename=datafile_fn)
def read_datafile(self,filename): self.datafile = PyposmatDataFile() self.datafile.read(filename=filename) self._parameter_names = self.datafile.parameter_names self._qoi_names = self.datafile.qoi_names self._error_names = self.datafile.error_names self._df = copy.deepcopy(self.datafile.df) self.create_absolute_errors()
def covariance_analysis(data_fn,names): assert isinstance(data_fn,str) assert isinstance(names,list) data = PyposmatDataFile() data.read(filename=data_fn) cov_matrix = np.cov(data.df[names].T) w,v = linalg.eig(cov_matrix) print("eigenvalues:\n",w) print("eigenvectors:\n",v)
def test__get_header_string(): parameter_names = ['param{}'.format(i + 1) for i in range(3)] qoi_names = ['qoi{}'.format(i + 1) for i in range(5)] error_names = ['err{}'.format(i + 1) for i in range(5)] datafile = PyposmatDataFile() s = datafile.get_header_string(parameter_names=parameter_names, qoi_names=qoi_names, error_names=error_names) assert type(s) is str # check assignment assert type(datafile.parameter_names) is list assert len(datafile.parameter_names) == len(parameter_names) for i, v in enumerate(parameter_names): assert datafile.parameter_names[i] == v assert type(datafile.qoi_names) is list assert len(datafile.qoi_names) == len(qoi_names) for i, v in enumerate(qoi_names): assert datafile.qoi_names[i] == v assert type(datafile.error_names) is list assert len(datafile.error_names) == len(error_names) for i, v in enumerate(error_names): assert datafile.error_names[i] == v # check string lines = s.split("\n") line_1 = lines[0].strip().split(",") line_2 = lines[1].strip().split(",") # check line 1 assert 'sim_id' in line_1 assert 'cluster_id' not in line_1 for v in parameter_names: assert v in line_1 for v in qoi_names: assert v in line_1 for v in error_names: assert v in line_1 # check line 2 assert line_2.count('sim_id') == 1 assert line_2.count('cluster_id') == 0 assert line_2.count('param') == len(parameter_names) assert line_2.count('qoi') == len(qoi_names) assert line_2.count('err') == len(error_names) assert line_2.count('qoi_v') == 0 assert line_2.count('err_v') == 0
def read_datafile(self, filename=None): if filename is not None: self.data_fn = filename _filename = self.data_fn self.data = PyposmatDataFile() self.data.read(_filename) self.df = copy.deepcopy(self.data.df) (_nrows, _ncols) = self.df.shape self.data_nrows = _nrows self.data_ncols = _ncols
def _initialize_data(self, data): if isinstance(data, str): assert os.path.isfile(data) self.data = PyposmatDataFile() self.data.read(filename=data) elif isinstance(data, PyposmatDataFile): self.data = deepcopy(data) else: raise TypeError('data cannot be type:{}'.format(str(type(data)))) self.data.create_normalized_errors( normalize_type='by_qoi_target', qoi_targets=self.configuration.qoi_targets)
def read_datafile(self, filename=None): if filename is not None: self.datafile_fn = filename _filename = self.datafile_fn self.datafile = PyposmatDataFile() self.datafile.read(filename=_filename) self._parameter_names = self.datafile.parameter_names self._qoi_names = self.datafile.qoi_names self._error_names = self.datafile.error_names self.df = copy.deepcopy(self.datafile.df) self.create_absolute_errors()
class PyposmatPostProcessorTestHarness(object): def __init__(self, configuration_fn, datafile_fn): self.configuration_fn = configuration_fn self.datafile_fn = datafile_fn if configuration_fn is not None: self.configuration = PyposmatConfigurationFile() self.configuration.read(configuration_fn) if datafile_fn is not None: self.datafile = PyposmatDataFile() self.datafile.read(filename=datafile_fn) def get_parameter_names(self): return self.configuration.parameter_names
def calculate_kld(data_1_fn,data_2_fn,names,n_samples=2000): assert isinstance(data_1_fn,str) assert isinstance(data_2_fn,str) assert isinstance(n_samples,int) assert os.path.isfile(data_1_fn) assert os.path.isfile(data_1_fn) data_1 = PyposmatDataFile() data_1.read(filename=data_1_fn) data_2 = PyposmatDataFile() data_2.read(filename=data_2_fn) w1,v1 = linalg.eig(np.cov(data_1.df[names].T)) w2,v2 = linalg.eig(np.cov(data_2.df[names].T)) cov1_ill_conditioned = any([k < 0 for k in w1.tolist()]) cov2_ill_conditioned = any([k < 0 for k in w2.tolist()]) any_ill_conditioned = any([cov1_ill_conditioned,cov2_ill_conditioned]) if any_ill_conditioned: print('using ill-conditioned kde') kde_1 = GaussianKde(data_1.df[names].T) print(kde_1.n, kde_1.d) kde_2 = GaussianKde(data_2.df[names].T) else: kde_1 = gaussian_kde(data_1.df[names].T) kde_2 = gaussian_kde(data_2.df[names].T) kld = kullbach_lieber_divergence(kde_1,kde_2,n_samples) return kld
def test__write_header_section(): cleanup_test() parameter_names = ['param{}'.format(i + 1) for i in range(3)] qoi_names = ['qoi{}'.format(i + 1) for i in range(5)] error_names = ['err{}'.format(i + 1) for i in range(5)] datafile = PyposmatDataFile() datafile.write_header_section(parameter_names=parameter_names, qoi_names=qoi_names, error_names=error_names, filename=datafile_out_fn) assert os.path.isfile(datafile_out_fn) datafile_read = PyposmatDataFile() datafile_read.read(filename=datafile_out_fn) assert len(datafile_read.parameter_names) == len(parameter_names) for i, v in enumerate(parameter_names): assert datafile_read.parameter_names[i] == v assert len(datafile_read.qoi_names) == len(qoi_names) for i, v in enumerate(qoi_names): assert datafile_read.qoi_names[i] == v assert len(datafile_read.error_names) == len(qoi_names) for i, v in enumerate(error_names): assert datafile_read.error_names[i] == v cleanup_test()
def initialize_data(self, data): assert isinstance(data,str) \ or isinstance(data,PyposmatDataFile) \ or data is None if isinstance(data, str): self.data = PyposmatDataFile() self.data.read(filename=data) elif isinstance(data, PyposmatDataFile): self.data = data elif data is None: self.data = None else: m = 'data argument must either be path string or a PyposmatDataFile object' raise TypeError(m)
def read_data(self, filename): """read in pyposmat data filename Args: filename(str): path of the data file """ self.data_fn = filename self.data = PyposmatDataFile() self.data.read(filename) self.parameter_names = self.data.parameter_names self.qoi_names = self.data.qoi_names self.error_names = self.data.error_names self.df = self.data.df
def show_qoi_targets(config_fn, data_fn): o_config = PyposmatConfigurationFile() o_config.read(filename=config_fn) o_data = PyposmatDataFile() o_data.read(filename=data_fn) for qoi_name, qoi_target in o_config.qoi_targets.items(): try: qoi_avg = o_data.df[qoi_name].mean() except KeyError as e: qoi_avg = 'no value' s = "{:20} {:10} {:10}".format(qoi_name,qoi_target,qoi_avg) print(s)
def merge_pypospack_datafiles(datafile_fns): d0 = PyposmatDataFile() d0.read(filename=datafile_fns[0]) df0 = d0.df for i in range(1, len(datafile_fns)): print("merging {}...".format(datafile_fns[i])) d = PyposmatDataFile() d.read(filename=datafile_fns[i]) df = d.df df0 = pd.concat([df0, df]).drop_duplicates().reset_index(drop=True) d0.df = df0 return d0
def test____init____data_as_obj(): o = Pyposmat2DDensityPlot(data=PyposmatDataFile()) assert isinstance(o, Pyposmat2DDensityPlot) assert o.configuration is None assert isinstance(o.data, PyposmatDataFile) assert o.fig is None assert o.ax is None
def get_best_parameterization(config_fn,data_fn,metric_name='d_metric',o_config=None,o_data=None): _analyzer = PyposmatDataAnalyzer() _analyzer.read_configuration_file(filename=config_fn) _analyzer.read_data_file(filename=data_fn) # calculate the scoring metric if metric_name is 'd_metric': _df = _analyzer.calculate_d_metric(df=_analyzer.datafile.df) else: s = "The metric name {} is unsupported" s = s.format(metric_name) raise PyposmatUnsupportedPotentialScoringMetric(s) _data = PyposmatDataFile() _data.read(filename=data_fn) _data.df = _df _data.subselect_by_score(score_name='d_metric',n=1) _free_parameter_names = _analyzer.configuration.free_parameter_names _parameter_best_dict = OrderedDict() for pn in _free_parameter_names: _parameter_best_dict[pn] = _data.sub_parameter_df.iloc[0][pn] return _parameter_best_dict
class BaseAnalysis(object): def __init__(self, configuration, data, output_path=None): self.configuration = None self.data = None self.output_path = None self._initialize_configuration(configuration=configuration) self._initialize_data(data=data) self._initialize_output_path(path=output_path) def _initialize_configuration(self, configuration): if isinstance(configuration, str): assert os.path.isfile(configuration) self.configuration = PyposmatConfigurationFile() self.configuration.read(filename=configuration) elif isinstance(configuration, PyposmatConfigurationFile): self.configuration = configuration else: raise TypeError('configuration cannot be type:{}'.format( str(type(configuration)))) def _initialize_data(self, data): if isinstance(data, str): assert os.path.isfile(data) self.data = PyposmatDataFile() self.data.read(filename=data) elif isinstance(data, PyposmatDataFile): self.data = deepcopy(data) else: raise TypeError('data cannot be type:{}'.format(str(type(data)))) self.data.create_normalized_errors( normalize_type='by_qoi_target', qoi_targets=self.configuration.qoi_targets) def _initialize_output_path(self, path): if path is None: self.output_path = None elif isinstance(path, str): if os.path.isdir(path): shutil.rmtree(path) os.mkdir(path) self.output_path = path else: raise TypeError