def from_series(self, src_series, dst_series=None, store_transposed=False): """ Populate the numbering map with vertices from the specified pair of series objects, one for the source and one for the destination Parameters ---------- src_series: cudf.Series or dask_cudf.Series Contains a list of external vertex identifiers that will be numbered by the NumberMap class. dst_series: cudf.Series or dask_cudf.Series Contains a list of external vertex identifiers that will be numbered by the NumberMap class. store_transposed : bool Identify how the graph adjacency will be used. If True, the graph will be organized by destination. If False, the graph will be organized by source """ if self.implementation is not None: raise Exception("NumberMap is already populated") if dst_series is not None and type(src_series) != type(dst_series): raise Exception("src_series and dst_series must have same type") if type(src_series) is cudf.Series: dst_series_list = None df = cudf.DataFrame() df["s"] = src_series if dst_series is not None: df["d"] = dst_series dst_series_list = ["d"] self.implementation = NumberMap.SingleGPU(df, ["s"], dst_series_list, self.id_type, store_transposed) elif type(src_series) is dask_cudf.Series: dst_series_list = None df = dask_cudf.DataFrame() df["s"] = src_series if dst_series is not None: df["d"] = dst_series dst_series_list = ["d"] self.implementation = NumberMap.MultiGPU(df, ["s"], dst_series_list, self.id_type, store_transposed) else: raise Exception("src_series must be cudf.Series or " "dask_cudf.Series") self.implementation.compute()
def to_internal_vertex_id(self, df, col_names=None): """ Given a collection of external vertex ids, return the internal vertex ids Parameters ---------- df: cudf.DataFrame, cudf.Series, dask_cudf.DataFrame, dask_cudf.Series Contains a list of external vertex identifiers that will be converted into internal vertex identifiers col_names: (optional) list of strings This list of 1 or more strings contain the names of the columns that uniquely identify an external vertex identifier Returns --------- vertex_ids : cudf.Series or dask_cudf.Series The vertex identifiers. Note that to_internal_vertex_id does not guarantee order or partitioning (in the case of dask_cudf) of vertex ids. If order matters use add_internal_vertex_id """ tmp_df = None tmp_col_names = None if type(df) is cudf.Series: tmp_df = cudf.DataFrame() tmp_df["0"] = df tmp_col_names = ["0"] elif type(df) is dask_cudf.Series: tmp_df = dask_cudf.DataFrame() tmp_df["0"] = df tmp_col_names = ["0"] else: tmp_df = df tmp_col_names = col_names reply = self.implementation.to_internal_vertex_id( tmp_df, tmp_col_names) if type(df) in [cudf.DataFrame, dask_cudf.DataFrame]: return reply["0"] else: return reply