def from_arrays(**arrays): """Create an in memory dataset from numpy arrays :param: arrays: keyword arguments with arrays :Example: >>> x = np.arange(10) >>> y = x ** 2 >>> dataset = vx.from_arrays(x=x, y=y) """ import numpy as np import six dataset = vaex.dataset.DatasetArrays("array") for name, array in arrays.items(): if not isinstance(array, np.ndarray) and isinstance(array[0], six.string_types): try: array = np.array(array, dtype='S') except UnicodeEncodeError: array = np.array(array, dtype='U') else: array = np.asanyarray(array) dataset.add_column(name, array) return dataset
def from_pandas(df, name="pandas"): """Create an in memory dataset from a pandas dataframe :param: pandas.DataFrame df: Pandas dataframe :param: name: unique for the dataset >>> import pandas as pd >>> df = pd.from_csv("test.csv") >>> ds = vx.from_pandas(df, name="test") """ import six dataset = vaex.dataset.DatasetArrays(name) for name in df.columns: values = df[name].values if isinstance(values[0], six.string_types): values = values.astype("S") try: dataset.add_column(name, values) except Exception as e: print( "could not convert column %s, error: %r, will try to convert it to string" % (name, e)) try: values = values.astype("S") dataset.add_column(name, values) except Exception as e: print("Giving up column %s, error: %r" (name, e)) return dataset
def add(name, column): values = column.values if isinstance(values[0], six.string_types): values = values.astype("S") try: dataset.add_column(name, values) except Exception as e: print("could not convert column %s, error: %r, will try to convert it to string" % (name, e)) try: values = values.astype("S") dataset.add_column(name, values) except Exception as e: print("Giving up column %s, error: %r" (name, e))
def from_items(*items): """Create an in memory dataset from numpy arrays, in contrast to from_arrays this keeps the order of columns intact :param: items: list of [(name, numpy array), ...] :Example: >>> x = np.arange(10) >>> y = x ** 2 >>> dataset = vx.from_items(('x', x), ('y', y)) """ dataset = vaex.dataset.DatasetArrays("array") for name, array in items: dataset.add_column(name, array) return dataset
def from_arrays(**arrays): """Create an in memory dataset from numpy arrays :param: arrays: keyword arguments with arrays :Example: >>> x = np.arange(10) >>> y = x ** 2 >>> dataset = vx.from_arrays(x=x, y=y) """ dataset = vaex.dataset.DatasetArrays("array") for name, array in arrays.items(): dataset.add_column(name, array) return dataset