def is_column_a(df, column=None, dtypes="str"): """ Check if column match a list of data types :param df: spark or dask dataframe :param column: column to be compared with :param dtypes: types to be checked :return: """ column = val_to_list(column) if len(column) > 1: RaiseIt.length_error(column, 1) data_type = tuple(val_to_list(parse_dtypes(df, dtypes))) column = one_list_to_val(column) # Filter columns by data type # print("df",type(df),df) v = df.cols.schema_dtype(column) if is_spark_dataframe(df.data): result = isinstance(v, data_type) elif is_dask_dataframe(df): result = v in data_type else: result = None return result
def get_output_cols(input_cols, output_cols): # Construct input and output columns names if is_list(input_cols) and is_list(output_cols): if len(input_cols) != len(output_cols): RaiseIt.length_error(input_cols, output_cols) elif is_list(input_cols) and is_str(output_cols): if len(input_cols) > 1: output_cols = list([i + output_cols for i in input_cols]) else: output_cols = val_to_list(output_cols) elif is_str(input_cols) and is_str(output_cols): output_cols = val_to_list(output_cols) elif output_cols is None: output_cols = input_cols return output_cols
def is_column_a(df, column, dtypes): """ Check if column match a list of data types :param df: dataframe :param column: column to be compared with :param dtypes: types to be checked :return: """ column = val_to_list(column) if len(column) > 1: RaiseIt.length_error(column, 1) data_type = tuple(val_to_list(parse_spark_dtypes(dtypes))) column = one_list_to_val(column) # Filter columns by data type return isinstance(df.schema[column].dataType, data_type)
def get_output_cols(input_cols, output_cols): """ Construct output columns names given :param input_cols: :param output_cols: :return: """ if is_list(input_cols) and is_list(output_cols): if len(input_cols) != len(output_cols): RaiseIt.length_error(input_cols, output_cols) elif is_list(input_cols) and is_str(output_cols): if len(input_cols) > 1: output_cols = list([i + output_cols for i in input_cols]) else: output_cols = val_to_list(output_cols) elif is_str(input_cols) and is_str(output_cols): output_cols = val_to_list(output_cols) elif output_cols is None: output_cols = input_cols return output_cols