def featurize_many(self, entries, ignore_errors=False, return_errors=False, pbar=True): """Featurize a list of entries. If `featurize` takes multiple inputs, supply inputs as a list of tuples. Featurize_many supports entries as a list, tuple, numpy array, Pandas Series, or Pandas DataFrame. Args: entries (list-like object): A list of entries to be featurized. ignore_errors (bool): Returns NaN for entries where exceptions are thrown if True. If False, exceptions are thrown as normal. return_errors (bool): If True, returns the feature list as determined by ignore_errors with traceback strings added as an extra 'feature'. Entries which featurize without exceptions have this extra feature set to NaN. pbar (bool): Show a progress bar for featurization if True. Returns: (list) features for each entry. """ if return_errors and not ignore_errors: raise ValueError("Please set ignore_errors to True to use" " return_errors.") # Check inputs if not isinstance(entries, (tuple, list, np.ndarray, pd.Series, pd.DataFrame)): raise Exception("'entries' must be a list-like object") # Special case: Empty list if len(entries) is 0: return [] # If the featurize function only has a single arg, zip the inputs if isinstance(entries, pd.DataFrame): entries = entries.values elif isinstance( entries, pd.Series) or not isinstance(entries[0], (tuple, list, np.ndarray)): entries = zip(entries) # Add a progress bar if pbar: if is_notebook(): tqdm_func = tqdm.tqdm_notebook else: tqdm_func = tqdm.tqdm # list() required, tqdm has issues with memory if generator given entries = tqdm_func(list(entries), desc=self.__class__.__name__) # Run the actual featurization if self.n_jobs == 1: return [ self.featurize_wrapper(x, ignore_errors=ignore_errors, return_errors=return_errors) for x in entries ] else: if sys.version_info[0] < 3: warnings.warn("Multiprocessing is not supported in " "matminer for Python 2.x. Multiprocessing has " "been disabled. Please upgrade to Python 3.x to " "enable multiprocessing.") self.set_n_jobs(1) return self.featurize_many(entries, ignore_errors=ignore_errors, return_errors=return_errors, pbar=pbar) with Pool(self.n_jobs) as p: func = partial(self.featurize_wrapper, return_errors=return_errors, ignore_errors=ignore_errors) return p.map(func, entries, chunksize=self.chunksize)
def featurize_many(self, entries, ignore_errors=False, return_errors=False, pbar=True): """Featurize a list of entries. If `featurize` takes multiple inputs, supply inputs as a list of tuples. Featurize_many supports entries as a list, tuple, numpy array, Pandas Series, or Pandas DataFrame. Args: entries (list-like object): A list of entries to be featurized. ignore_errors (bool): Returns NaN for entries where exceptions are thrown if True. If False, exceptions are thrown as normal. return_errors (bool): If True, returns the feature list as determined by ignore_errors with traceback strings added as an extra 'feature'. Entries which featurize without exceptions have this extra feature set to NaN. pbar (bool): Show a progress bar for featurization if True. Returns: (list) features for each entry. """ if return_errors and not ignore_errors: raise ValueError("Please set ignore_errors to True to use" " return_errors.") # Check inputs if not isinstance(entries, (tuple, list, np.ndarray, pd.Series, pd.DataFrame)): raise Exception("'entries' must be a list-like object") # Special case: Empty list if len(entries) is 0: return [] # If the featurize function only has a single arg, zip the inputs if isinstance(entries, pd.DataFrame): entries = entries.values elif isinstance(entries, pd.Series) or not isinstance(entries[0], (tuple, list, np.ndarray)): entries = zip(entries) # Add a progress bar if pbar: if is_notebook(): tqdm_func = tqdm.tqdm_notebook else: tqdm_func = tqdm.tqdm # list() required, tqdm has issues with memory if generator given entries = tqdm_func(list(entries), desc=self.__class__.__name__) # Run the actual featurization if self.n_jobs == 1: return [self.featurize_wrapper(x, ignore_errors=ignore_errors, return_errors=return_errors) for x in entries] else: if sys.version_info[0] < 3: warnings.warn("Multiprocessing is not supported in " "matminer for Python 2.x. Multiprocessing has " "been disabled. Please upgrade to Python 3.x to " "enable multiprocessing.") self.set_n_jobs(1) return self.featurize_many(entries, ignore_errors=ignore_errors, return_errors=return_errors, pbar=pbar) with Pool(self.n_jobs) as p: func = partial(self.featurize_wrapper, return_errors=return_errors, ignore_errors=ignore_errors) return p.map(func, entries, chunksize=self.chunksize)