def test_datetime_units() -> None: df = pl.DataFrame( { "ns": pl.date_range( datetime(2020, 1, 1), datetime(2020, 5, 1), "1mo", time_unit="ns" ), "us": pl.date_range( datetime(2020, 1, 1), datetime(2020, 5, 1), "1mo", time_unit="us" ), "ms": pl.date_range( datetime(2020, 1, 1), datetime(2020, 5, 1), "1mo", time_unit="ms" ), } ) names = set(df.columns) for unit in DTYPE_TEMPORAL_UNITS: subset = names - {unit} assert ( len(set(df.select([pl.all().exclude(pl.Datetime(unit))]).columns) - subset) == 0 )
| (pl.col('p_vals_isoform').str.lengths() > 0) ).with_columns([ pl.when( pl.col('p_vals_expression').str.lengths() == 0).then(None).otherwise( pl.col('p_vals_expression')).alias('p_vals_expression'), pl.when( pl.col('p_vals_expression').str.lengths() == 0).then(None).otherwise( pl.col('associations (tissue:target)_expression')).alias( 'associations (tissue:target)_expression'), pl.when( pl.col('p_vals_expression').str.lengths() == 0).then(None).otherwise( pl.col('n_tests_expression')).alias('n_tests_expression'), pl.when(pl.col('p_vals_splice').str.lengths() == 0).then(None).otherwise( pl.col('p_vals_splice')).alias('p_vals_splice'), pl.when(pl.col('p_vals_splice').str.lengths() == 0).then(None).otherwise( pl.col('associations (tissue:target)_splice')).alias( 'associations (tissue:target)_splice'), pl.when(pl.col('p_vals_splice').str.lengths() == 0).then(None).otherwise( pl.col('n_tests_splice')).alias('n_tests_splice'), pl.when(pl.col('p_vals_isoform').str.lengths() == 0).then(None).otherwise( pl.col('p_vals_isoform')).alias('p_vals_isoform'), pl.when(pl.col('p_vals_isoform').str.lengths() == 0).then(None).otherwise( pl.col('associations (tissue:target)_isoform')).alias( 'associations (tissue:target)_isoform'), pl.when(pl.col('p_vals_isoform').str.lengths() == 0).then(None).otherwise( pl.col('n_tests_isoform')).alias('n_tests_isoform'), ]).sort('chrom_pos').select(['chrom_pos', pl.all().exclude('^chrom_pos$')]) total_qtl_str.to_pandas().to_csv('blessed_qtl_STRs.tab', sep='\t', index=False)
def df_no_lists(df: pl.DataFrame) -> pl.DataFrame: return df.select( pl.all().exclude(["list_str", "list_int", "list_bool", "list_int", "list_flt"]) )
def test_all_expr(): df = pl.DataFrame({"nrs": [1, 2, 3, 4, 5, None]}) assert df[[pl.all()]].frame_equal(df)
def test_apply_return_py_object() -> None: df = pl.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) out = df.select([pl.all().map(lambda s: reduce(lambda a, b: a + b, s))]) assert out.shape == (1, 2)
def test_cat_to_pandas() -> None: df = pl.DataFrame({"a": ["best", "test"]}) df = df.with_columns(pl.all().cast(pl.Categorical)) out = df.to_pandas() assert "category" in str(out["a"].dtype)
def test_prefix(fruits_cars: pl.DataFrame) -> None: df = fruits_cars out = df.select([pl.all().prefix("reverse_")]) assert out.columns == [ "reverse_A", "reverse_fruits", "reverse_B", "reverse_cars" ]
def test_suffix(fruits_cars: pl.DataFrame) -> None: df = fruits_cars out = df.select([pl.all().suffix("_reverse")]) assert out.columns == [ "A_reverse", "fruits_reverse", "B_reverse", "cars_reverse" ]
def test_exclude_selection() -> None: df = pl.DataFrame({"a": [1], "b": [1], "c": [True]}).lazy() assert df.select([pl.exclude("a")]).columns == ["b", "c"] assert df.select(pl.all().exclude(pl.Boolean)).columns == ["a", "b"] assert df.select(pl.all().exclude([pl.Boolean])).columns == ["a", "b"]
def test_all_expr() -> None: df = pl.DataFrame({"nrs": [1, 2, 3, 4, 5, None]}) assert df.select([pl.all()]).frame_equal(df)
def __init__(self, path=None, string=None): """ Parser of the cpt file. Parameters ---------- path: str Path to the *.gef file. string: str String version of the *.gef file. """ super().__init__(path=path, string=string) if not self.type == "cpt": raise ValueError("The selected gef file is not a cpt. " "Check the REPORTCODE or the PROCEDURECODE.") self.project_id = utils.parse_project_type(self._headers, "cpt") self.cone_id = utils.parse_cone_id(self._headers) self.cpt_class = utils.parse_cpt_class(self._headers) self.column_void = utils.parse_column_void(self._headers) self.nom_surface_area_cone_tip = utils.parse_measurement_var_as_float( self._headers, 1) self.nom_surface_area_friction_element = utils.parse_measurement_var_as_float( self._headers, 2) self.net_surface_area_quotient_of_the_cone_tip = ( utils.parse_measurement_var_as_float(self._headers, 3)) self.net_surface_area_quotient_of_the_friction_casing = ( utils.parse_measurement_var_as_float(self._headers, 4)) self.distance_between_cone_and_centre_of_friction_casing = ( utils.parse_measurement_var_as_float(self._headers, 5)) self.friction_present = utils.parse_measurement_var_as_float( self._headers, 6) self.ppt_u1_present = utils.parse_measurement_var_as_float( self._headers, 7) self.ppt_u2_present = utils.parse_measurement_var_as_float( self._headers, 8) self.ppt_u3_present = utils.parse_measurement_var_as_float( self._headers, 9) self.inclination_measurement_present = utils.parse_measurement_var_as_float( self._headers, 10) self.use_of_back_flow_compensator = utils.parse_measurement_var_as_float( self._headers, 11) self.type_of_cone_penetration_test = utils.parse_measurement_var_as_float( self._headers, 12) self.pre_excavated_depth = utils.parse_measurement_var_as_float( self._headers, 13) self.groundwater_level = utils.parse_measurement_var_as_float( self._headers, 14) self.water_depth_offshore_activities = utils.parse_measurement_var_as_float( self._headers, 15) self.end_depth_of_penetration_test = utils.parse_measurement_var_as_float( self._headers, 16) self.stop_criteria = utils.parse_measurement_var_as_float( self._headers, 17) self.zero_measurement_cone_before_penetration_test = ( utils.parse_measurement_var_as_float(self._headers, 20)) self.zero_measurement_cone_after_penetration_test = ( utils.parse_measurement_var_as_float(self._headers, 21)) self.zero_measurement_friction_before_penetration_test = ( utils.parse_measurement_var_as_float(self._headers, 22)) self.zero_measurement_friction_after_penetration_test = ( utils.parse_measurement_var_as_float(self._headers, 23)) self.zero_measurement_ppt_u1_before_penetration_test = ( utils.parse_measurement_var_as_float(self._headers, 24)) self.zero_measurement_ppt_u1_after_penetration_test = ( utils.parse_measurement_var_as_float(self._headers, 25)) self.zero_measurement_ppt_u2_before_penetration_test = ( utils.parse_measurement_var_as_float(self._headers, 26)) self.zero_measurement_ppt_u2_after_penetration_test = ( utils.parse_measurement_var_as_float(self._headers, 27)) self.zero_measurement_ppt_u3_before_penetration_test = ( utils.parse_measurement_var_as_float(self._headers, 28)) self.zero_measurement_ppt_u3_after_penetration_test = ( utils.parse_measurement_var_as_float(self._headers, 29)) self.zero_measurement_inclination_before_penetration_test = ( utils.parse_measurement_var_as_float(self._headers, 30)) self.zero_measurement_inclination_after_penetration_test = ( utils.parse_measurement_var_as_float(self._headers, 31)) self.zero_measurement_inclination_ns_before_penetration_test = ( utils.parse_measurement_var_as_float(self._headers, 32)) self.zero_measurement_inclination_ns_after_penetration_test = ( utils.parse_measurement_var_as_float(self._headers, 33)) self.zero_measurement_inclination_ew_before_penetration_test = ( utils.parse_measurement_var_as_float(self._headers, 34)) self.zero_measurement_inclination_ew_after_penetration_test = ( utils.parse_measurement_var_as_float(self._headers, 35)) self.mileage = utils.parse_measurement_var_as_float(self._headers, 41) column_names = determine_column_names(self._headers) self.df = ( self.parse_data(self._headers, self._data, column_names).lazy(). pipe(replace_column_void, self.column_void).pipe( correct_pre_excavated_depth, self.pre_excavated_depth).with_column( correct_depth_with_inclination(column_names)).select( # Remove None values since they throw an error [ expr for expr in [ pl.all().exclude(["depth", "friction_number"]), col("depth").abs(), calculate_friction_number(column_names), self.calculate_elevation_with_respect_to_nap( self.zid, self.height_system), ] if expr is not None ]).collect())
def test_null_count_expr() -> None: df = pl.DataFrame({"key": ["a", "b", "b", "a"], "val": [1, 2, None, 1]}) assert df.select([pl.all().null_count()]).to_dict(False) == {"key": [0], "val": [1]}