def _apply_pi_theorem(self, df): if self.apply_pi_theorem and self.units: ureg = pint.UnitRegistry(auto_reduce_dimensions=True, autoconvert_offset_to_baseunit=True) parsed_units = _parse_units(self.units, ureg, self.verbose) # use only original features parsed_units = { c: parsed_units[c] for c in self.feateng_cols_ if not parsed_units[c].dimensionless } if self.verbose: print("[AutoFeat] Applying the Pi Theorem") pi_theorem_results = ureg.pi_theorem(parsed_units) for i, r in enumerate(pi_theorem_results, 1): if self.verbose: print("[AutoFeat] Pi Theorem %i: " % i, pint.formatter(r.items())) # compute the final result by multiplying and taking the power of cols = sorted(r) # only use data points where non of the affected columns are NaNs not_na_idx = df[cols].notna().all(axis=1) ptr = df[cols[0]].to_numpy()[not_na_idx]**r[cols[0]] for c in cols[1:]: ptr *= df[c].to_numpy()[not_na_idx]**r[c] df.loc[not_na_idx, "PT%i_%s" % (i, pint.formatter(r.items()).replace(" ", ""))] = ptr return df
def to_dimensionless(quant, basis, dimensional=False): dim = lambda q: pint.formatter(q.dimensionality.items()) quantities = { '_': dim(quant) } for system, quantity in basis: quantities['{0}.{1}'.format(type(system).__name__, quantity)] = dim(system._asdict()[quantity]) powers = pint.pi_theorem(quantities)[0] power = powers.pop('_') expression = lambda sgn: pint.formatter([ (item[0], sgn*item[1]/power) for item in powers.items() ]) if args.verbose: print('\t\t{}'.format('=' if dimensional else '*'), expression(-1)) return eval(expression(-1 if dimensional else 1))*(ureg('') if dimensional else quant)
def _apply_pi_theorem(self, df): if self.units: ureg = pint.UnitRegistry(auto_reduce_dimensions=True, autoconvert_offset_to_baseunit=True) parsed_units = _parse_units(self.units, ureg) # use only original features parsed_units = { c: parsed_units[c] for c in self.feateng_cols if not parsed_units[c].dimensionless } print("[AutoFeatRegression] Applying the Pi Theorem") pi_theorem_results = ureg.pi_theorem(parsed_units) for i, r in enumerate(pi_theorem_results, 1): print("Pi Theorem %i: " % i, pint.formatter(r.items())) # compute the final result by multiplying and taking the power of cols = sorted(r) ptr = df[cols[0]].values**r[cols[0]] for c in cols[1:]: ptr *= df[c].values**r[c] df["PT%i: %s" % (i, pint.formatter(r.items()).replace(" ", ""))] = ptr return df