def hash_columns(ds, hashed_columns): if len(hashed_columns) == 0: return ds assert all(col < len(ds.column_names) for col in hashed_columns) for row in ds.rows: for col in hashed_columns: val = row.values[col] if val.getType() not in [Value.NVAL, Value.__EMPTY__]: row.values[col] = Value(iVal=murmurhash2(val)) return ds
def hash_columns(ds, hashed_columns): if len(hashed_columns) == 0: return ds for col in hashed_columns: assert col < len(ds.column_names), "The hashed column should in range." for row in ds.rows: for col in hashed_columns: if row.values[col].getType() != Value.NVAL and row.values[col].getType() != Value.__EMPTY__: row.values[col] = Value(iVal = murmurhash2(row.values[col])) return ds