def select_or_selectable_to_frame(el, bind=None, dshape=None, **kwargs): bind = getbind(el, bind) if bind.dialect.name == 'postgresql': buf = StringIO() append(CSV(None, buffer=buf), el, bind=bind, **kwargs) buf.seek(0) datetime_fields = [] other_dtypes = {} optional_string_fields = [] try: fields = dshape.measure.fields except AttributeError: fields = [(0, dshape.measure)] for field, dtype in fields: if isdatelike(dtype): datetime_fields.append(field) elif isinstance(dtype, Option): ty = dtype.ty if ty in datashape.integral: other_dtypes[field] = 'float64' else: other_dtypes[field] = ty.to_numpy_dtype() if ty == string: optional_string_fields.append(field) else: other_dtypes[field] = dtype.to_numpy_dtype() df = pd.read_csv( buf, parse_dates=datetime_fields, dtype=other_dtypes, skip_blank_lines=False, escapechar=kwargs.get('escapechar', '\\'), ) # read_csv really wants missing values to be NaN, but for # string (object) columns, we want None to be missing for field in optional_string_fields: df.loc[df[field].isnull(), field] = None return df columns, rows = batch(el, bind=bind) dtypes = {} try: fields = dshape.measure.fields except AttributeError: fields = [(columns[0], dshape.measure)] for field, dtype in fields: if isinstance(dtype, Option): ty = dtype.ty if ty in datashape.integral: dtypes[field] = 'float64' else: dtypes[field] = ty.to_numpy_dtype() else: dtypes[field] = dtype.to_numpy_dtype() return pd.DataFrame( np.array(list(map(tuple, rows)), dtype=[(str(c), dtypes[c]) for c in columns]))
""" Vector norm See np.linalg.norm """ if ord is None or ord == 'fro': ord = 2 if ord == inf: return max(abs(expr), axis=axis, keepdims=keepdims) elif ord == -inf: return min(abs(expr), axis=axis, keepdims=keepdims) elif ord == 1: return sum(abs(expr), axis=axis, keepdims=keepdims) elif ord % 2 == 0: return sum(expr**ord, axis=axis, keepdims=keepdims)**(1.0 / ord) return sum(abs(expr)**ord, axis=axis, keepdims=keepdims)**(1.0 / ord) dshape_method_list.extend([ (iscollection, set([count, nelements])), (lambda ds: (iscollection(ds) and (isstring(ds) or isnumeric(ds) or isboolean(ds) or isdatelike(ds))), set([min, max])), (lambda ds: len(ds.shape) == 1, set([nrows, nunique])), (lambda ds: iscollection(ds) and isboolean(ds), set([any, all])), (lambda ds: iscollection(ds) and (isnumeric(ds) or isboolean(ds)), set([mean, sum, std, var, vnorm])), ]) method_properties.update([nrows])
See np.linalg.norm """ if ord is None or ord == 'fro': ord = 2 if ord == inf: return max(abs(expr), axis=axis, keepdims=keepdims) elif ord == -inf: return min(abs(expr), axis=axis, keepdims=keepdims) elif ord == 1: return sum(abs(expr), axis=axis, keepdims=keepdims) elif ord % 2 == 0: return sum(expr ** ord, axis=axis, keepdims=keepdims) ** (1.0 / ord) return sum(abs(expr) ** ord, axis=axis, keepdims=keepdims) ** (1.0 / ord) dshape_method_list.extend([ (iscollection, set([count, nelements])), (lambda ds: (iscollection(ds) and (isstring(ds) or isnumeric(ds) or isboolean(ds) or isdatelike(ds) or isinstance(ds, TimeDelta))), set([min, max])), (lambda ds: len(ds.shape) == 1, set([nrows, nunique])), (lambda ds: iscollection(ds) and isboolean(ds), set([any, all])), (lambda ds: iscollection(ds) and (isnumeric(ds) or isboolean(ds)), set([mean, sum, std, var, vnorm])), ]) method_properties.update([nrows])
See np.linalg.norm """ if ord is None or ord == 'fro': ord = 2 if ord == inf: return max(abs(expr), axis=axis, keepdims=keepdims) elif ord == -inf: return min(abs(expr), axis=axis, keepdims=keepdims) elif ord == 1: return sum(abs(expr), axis=axis, keepdims=keepdims) elif ord % 2 == 0: return sum(expr ** ord, axis=axis, keepdims=keepdims) ** (1.0 / ord) return sum(abs(expr) ** ord, axis=axis, keepdims=keepdims) ** (1.0 / ord) dshape_method_list.extend([ (iscollection, set([count, nelements])), (lambda ds: (iscollection(ds) and (isstring(ds) or isnumeric(ds) or isboolean(ds) or isdatelike(ds))), set([min, max])), (lambda ds: len(ds.shape) == 1, set([nrows, nunique])), (lambda ds: iscollection(ds) and isboolean(ds), set([any, all])), (lambda ds: iscollection(ds) and (isnumeric(ds) or isboolean(ds)), set([mean, sum, std, var, vnorm])), ]) method_properties.update([nrows])
""" Vector norm See np.linalg.norm """ if ord is None or ord == 'fro': ord = 2 if ord == inf: return max(abs(expr), axis=axis, keepdims=keepdims) elif ord == -inf: return min(abs(expr), axis=axis, keepdims=keepdims) elif ord == 1: return sum(abs(expr), axis=axis, keepdims=keepdims) elif ord % 2 == 0: return sum(expr**ord, axis=axis, keepdims=keepdims)**(1.0 / ord) return sum(abs(expr)**ord, axis=axis, keepdims=keepdims)**(1.0 / ord) dshape_method_list.extend([ (iscollection, set([count, nelements])), (lambda ds: (iscollection(ds) and (isstring(ds) or isnumeric(ds) or isboolean( ds) or isdatelike(ds) or isinstance(ds, TimeDelta))), set([min, max])), (lambda ds: len(ds.shape) == 1, set([nrows, nunique])), (lambda ds: iscollection(ds) and isboolean(ds), set([any, all])), (lambda ds: iscollection(ds) and (isnumeric(ds) or isboolean(ds)), set([mean, sum, std, var, vnorm])), ]) method_properties.update([nrows])
def select_or_selectable_to_frame(el, bind=None, dshape=None, **kwargs): bind = getbind(el, bind) if bind.dialect.name == 'postgresql': buf = StringIO() append(CSV(None, buffer=buf), el, bind=bind, **kwargs) buf.seek(0) datetime_fields = [] other_dtypes = {} optional_string_fields = [] try: fields = dshape.measure.fields except AttributeError: fields = [(0, dshape.measure)] for n, (field, dtype) in enumerate(fields): if isdatelike(dtype): datetime_fields.append(field) elif isinstance(dtype, Option): ty = dtype.ty if ty in datashape.integral: other_dtypes[field] = 'float64' else: other_dtypes[field] = ty.to_numpy_dtype() if ty == string: # work with integer column indices for the # optional_string columns because we don't always # know the column name and then the lookup will fail # in the loop below. optional_string_fields.append(n) else: other_dtypes[field] = dtype.to_numpy_dtype() df = pd.read_csv( buf, parse_dates=datetime_fields, dtype=other_dtypes, skip_blank_lines=False, escapechar=kwargs.get('escapechar', '\\'), ) # read_csv really wants missing values to be NaN, but for # string (object) columns, we want None to be missing columns = df.columns for field_ix in optional_string_fields: # use ``df.loc[bool, df.columns[field_ix]]`` because you cannot do # boolean slicing with ``df.iloc``. field = columns[field_ix] df.loc[df[field].isnull(), field] = None return df columns, rows = batch(el, bind=bind) dtypes = {} try: fields = dshape.measure.fields except AttributeError: fields = [(columns[0], dshape.measure)] for field, dtype in fields: if isinstance(dtype, Option): ty = dtype.ty if ty in datashape.integral: dtypes[field] = 'float64' else: dtypes[field] = ty.to_numpy_dtype() else: dtypes[field] = dtype.to_numpy_dtype() return pd.DataFrame(np.array(list(map(tuple, rows)), dtype=[(str(c), dtypes[c]) for c in columns]))
def select_or_selectable_to_frame(el, bind=None, dshape=None, **kwargs): bind = getbind(el, bind) if bind.dialect.name == 'postgresql': buf = StringIO() append(CSV(None, buffer=buf), el, bind=bind, **kwargs) buf.seek(0) datetime_fields = [] other_dtypes = {} optional_string_fields = [] try: fields = dshape.measure.fields except AttributeError: fields = [(0, dshape.measure)] for n, (field, dtype) in enumerate(fields): if isdatelike(dtype): datetime_fields.append(field) elif isinstance(dtype, Option): ty = dtype.ty if ty in datashape.integral: other_dtypes[field] = 'float64' else: other_dtypes[field] = ty.to_numpy_dtype() if ty == string: # work with integer column indices for the # optional_string columns because we don't always # know the column name and then the lookup will fail # in the loop below. optional_string_fields.append(n) else: other_dtypes[field] = dtype.to_numpy_dtype() df = pd.read_csv( buf, parse_dates=datetime_fields, dtype=other_dtypes, skip_blank_lines=False, escapechar=kwargs.get('escapechar', '\\'), ) # read_csv really wants missing values to be NaN, but for # string (object) columns, we want None to be missing columns = df.columns for field_ix in optional_string_fields: # use ``df.loc[bool, df.columns[field_ix]]`` because you cannot do # boolean slicing with ``df.iloc``. field = columns[field_ix] df.loc[df[field].isnull(), field] = None return df columns, rows = batch(el, bind=bind) dtypes = {} try: fields = dshape.measure.fields except AttributeError: fields = [(columns[0], dshape.measure)] for field, dtype in fields: if isinstance(dtype, Option): ty = dtype.ty if ty in datashape.integral: dtypes[field] = 'float64' else: dtypes[field] = ty.to_numpy_dtype() else: dtypes[field] = dtype.to_numpy_dtype() return pd.DataFrame( np.array(list(map(tuple, rows)), dtype=[(str(c), dtypes[c]) for c in columns]))