def write_df_logfiles(self, gbdfs_dct, dfpath): assert self.dfpath.exists(), self.dfpath self.mk_gbs_dir() def get_path_for_gbdf(filename): return self.gbs_dir.joinpath(filename) for filename, gbdf in gbdfs_dct.items(): gbdf_path = get_path_for_gbdf(filename) compose_left(create_beautiful_table, partial(self.write_to_disk, name=gbdf_path))(gbdf)
def entry(input_dfs, columns=util.columns, verbose=False): util.columns = columns assert isinstance(input_dfs[0], pd.DataFrame) and len(input_dfs[0]) > 1, input_dfs aggdf = compose_left( rename_data_columns, merge_dfs_into_one, )(input_dfs) if verbose: return aggdf return aggdf[columns]
def process_df_rows(df): newrows, i, si = [], 0, 0 for rawrow in df[util.safecolumns].itertuples(): row: namedtuple = compose_left( wrapdatalines, partial(colorize, clrs=False), )(rawrow) newrows.append([ f"{row.Index}", f"{row.filepath}", f"({row.line_number}):", f"{row.call_data}", f"{row.snoop_data}" ]) return newrows
def nc_files_to_tf_dataset( files: Sequence[str], convert: Callable[[xr.Dataset], tf.Tensor], ): """ Convert a list of netCDF paths into a tensorflow dataset. Args: files: List of local or remote file paths to include in dataset. Expected to be 2D ([sample, feature]) or 1D ([sample]) dimensions. config: Data preprocessing options for going from xr.Dataset to X, y tensor tuples grouped by variable. """ transform = compose_left(*[open_netcdf_dataset, convert]) return _seq_to_tf_dataset(files, transform)
def entry(input_dfs, verbose=False): # at this point we have 3 input_dfs # rename data columns, merge into one df>to_dict,iterate_over_rows, assert isinstance(input_dfs[0], pd.DataFrame) and len(input_dfs[0]) > 1, input_dfs tfdf = compose_left( rename_data_columns, merge_dfs_into_one, )(input_dfs) if verbose: return tfdf assert all([col in tfdf.columns for col in util.columns]), tfdf.columns return tfdf[[ "filepath", "line_number", "symbol", "event_kind", "call_data", "snoop_data", ]]
def get_pipeline( self, variables: Set[str], ): """ Args: variables: the variables required for training. Both inputs and outputs. Returns: conversion from dataset to dict of numpy or tensorflow tensors """ transform_funcs = [] # xarray transforms if self.antarctic_only: transform_funcs.append(transforms.select_antarctic) transform_funcs.append( transforms.derived_dataset( list(variables), tendency_timestep_sec=self.derived_microphys_timestep, )) if self.use_tensors: transform_funcs.append(transforms.to_tensors) else: transform_funcs.append(transforms.to_ndarrays) # array-like dataset transforms transform_funcs.append(transforms.expand_single_dim_data) if self.vertical_subselections is not None: transform_funcs.append( transforms.maybe_subselect_feature_dim( self.vert_sel_as_slices)) return compose_left(*transform_funcs)
def test_compose_left(): for (compose_left_args, args, kw, expected) in generate_compose_left_test_cases(): assert compose_left(*compose_left_args)(*args, **kw) == expected
def entry(df, tfdfpath): df = df.copy() writeable_string = compose_left(iterate_over_rows, )(df) return writeable_string
def merge_dfs_into_one(dct_o_renamed_dfs): tfdf = compose_left( iterate_over_rows, aggregate_lst_o_merged_rowdcts, )(dct_o_renamed_dfs) return tfdf
def entry(df, dfpath): df = df.copy() fltrd_df = compose_left(get_line_event_mask, lambda mask: df[mask])(df) return fltrd_df
def write_df_logfile(self, fltrd_df, dfpath): fltrd_dfpath = self.get_path_for_fltrd_df(dfpath) compose_left( create_beautiful_table, # -> str partial(self.write_to_disk, name=fltrd_dfpath))(fltrd_df)
def parapply(self, parts, *args, **kwargs): return ParApply(compose_left(*self.fns), parts, *args, **kwargs)
def entry(filename): df = compose_left(read_hunter_trace_file, process_lines_for_df, get_df)(filename) return df