def data(self): """ Get Selection data as a pandas.DataFrame If the data is modified, 'save' must be called for CASM to use the modified selection. """ if self._data is None: if self.path in ["MASTER", "ALL", "CALCULATED"]: self._data = query(self.proj, ['configname', 'selected'], self, all=self.all) elif self._is_json(): self._data = pandas.read_json(self.path, 'r', orient='records') else: with open(self.path, compat.pandas_rmode()) as f: if compat.peek(f) == '#': f.read(1) self._data = pandas.read_csv(f, sep=compat.str(' +'), engine='python') self._clean_data() if not self.all: self._data = self._data[self._data['selected'] == True] return self._data
def query(self, columns, force=False, verbose=False): """ Query requested columns and store them in 'data'. Will not overwrite columns that already exist, unless 'force'==True. Will query data for all configurations, whether selected or not, if `self.all == True`. Arguments --------- columns: List(str) Data requested, will be added as columns in `self.data`. This corresponds to the `-k` option of `casm query`. A list of options can be obtained from `casm query --help properties`. force: bool If `force==False`, input `columns` that already exist in `self.data.columns` will be ignored and those columns will not be updated. If `force==True`, those columns will be overwritten with new data. verbose: bool How much to print to stdout. """ if force == False: _col = [x for x in columns if x not in self.data.columns] else: _col = columns if verbose: print("# Query requested:", columns) if force == False: print("# Use existing:", [x for x in columns if x in self.data.columns]) else: print("# Overwrite existing:", [x for x in columns if x in self.data.columns]) if len(_col) == 0: print("# No query necessary") else: print("# Querying:", _col) if len(_col) == 0: return df = query(self.proj, _col, self.path, self.type, verbatim=True, all=self.all) if verbose: print("# DONE\n") msg = "querying different numbers of records: {0}, {1}".format( self.data.shape, df.shape) assert self.data.shape[0] == df.shape[0], msg for c in df.columns: self.data.loc[:, c] = df.loc[:, c].values
def query(self, columns, force=False, verbose=False): """ Query requested columns and store them in 'data'. Will not overwrite columns that already exist, unless 'force'==True. Will query data for all configurations, whether selected or not, if self.all == True. """ if force == False: _col = [x for x in columns if x not in self.data.columns] else: _col = columns if verbose: print "# Query requested:", columns if force == False: print "# Use existing:", [ x for x in columns if x in self.data.columns ] else: print "# Overwrite existing:", [ x for x in columns if x in self.data.columns ] if len(_col) == 0: print "# No query necessary" else: print "# Querying:", _col if len(_col) == 0: return df = query(self.proj, _col, self, all=self.all) if verbose: print "# DONE\n" msg = "querying different numbers of records: {0}, {1}".format( self.data.shape, df.shape) assert self.data.shape[0] == df.shape[0], msg for c in df.columns: self.data.loc[:, c] = df.loc[:, c].values
def data(self): """ Get Selection data as a pandas.DataFrame If the data is modified, 'save' must be called for CASM to use the modified selection. """ if self._data is None: if self.path in ["MASTER", "ALL", "CALCULATED"]: self._data = query(self.proj, ['configname', 'selected'], self) elif self._is_json(): self._data = pandas.read_json(self.path, orient='records') else: f = open(self.path, 'r') f.read(1) self._data = pandas.read_csv(f, sep=' *', engine='python') self._clean_data() if not self.all: self._data = self._data[self._data['selected']==True] return self._data
def query(self, columns, force=False, verbose=False): """ Query requested columns and store them in 'data'. Will not overwrite columns that already exist, unless 'force'==True. Will query data for all configurations, whether selected or not, if self.all == True. """ if force == False: _col = [x for x in columns if x not in self.data.columns] else: _col = columns if verbose: print "# Query requested:", columns if force == False: print "# Use existing:", [x for x in columns if x in self.data.columns] else: print "# Overwrite existing:", [x for x in columns if x in self.data.columns] if len(_col) == 0: print "# No query necessary" else: print "# Querying:", _col if len(_col) == 0: return df = query(self.proj, _col, self, all=self.all) if verbose: print "# DONE\n" msg = "querying different numbers of records: {0}, {1}".format( self.data.shape, df.shape) assert self.data.shape[0] == df.shape[0], msg for c in df.columns: self.data.loc[:,c] = df.loc[:,c].values