def test_bed_int8(tmp_path, shared_datadir): with open_bed(shared_datadir / "distributed_bed_test1_X.bed") as bed: for force_python_only in [False, True]: for order in ["F", "C"]: val = bed.read(dtype="int8", force_python_only=force_python_only, order=order) assert val.dtype == np.int8 assert (val.flags["C_CONTIGUOUS"] and order == "C") or (val.flags["F_CONTIGUOUS"] and order == "F") ref_val = reference_val(shared_datadir) ref_val[ref_val != ref_val] = -127 ref_val = ref_val.astype("int8") assert np.array_equal(ref_val, val) output = str(tmp_path / "int8.bed") for count_A1 in [False, True]: open_bed.write( output, ref_val, count_A1=count_A1, force_python_only=force_python_only, ) with open_bed(output, count_A1=count_A1) as bed2: assert np.array_equal( bed2.read(dtype="int8", force_python_only=force_python_only), ref_val, )
def test_write12(tmp_path): # =================================== # Starting main function # =================================== logging.info("starting 'test_writes'") np.random.seed(0) output_template = str(tmp_path / "writes.{0}.bed") i = 0 for row_count in [0, 5, 2, 1]: for col_count in [4, 2, 1, 0]: val = np.random.randint(0, 4, size=(row_count, col_count)) * 1.0 val[val == 3] = np.NaN row0 = ["0", "1", "2", "3", "4"][:row_count] row1 = ["0", "1", "2", "3", "4"][:row_count] col = ["s0", "s1", "s2", "s3", "s4"][:col_count] for is_none in [True, False]: metadata = {"fid": row0, "iid": row1, "sid": col} if is_none: col_prop012 = [x for x in range(5)][:col_count] metadata["chromosome"] = col_prop012 metadata["bp_position"] = col_prop012 metadata["cm_position"] = col_prop012 else: col_prop012 = None filename = output_template.format(i) logging.info(filename) i += 1 open_bed.write(filename, val, metadata=metadata ) #!!!cmk is it weird to "open_bed.write"? for subsetter in [None, np.s_[::2, ::3]]: with open_bed(filename) as bed: val2 = bed.read( index=subsetter, order="C", dtype="float32" ) #!!!cmk should float32 be the default so that NaN is better? if subsetter is None: expected = val else: expected = val[subsetter[0], :][:, subsetter[1]] assert np.allclose(val2, expected, equal_nan=True) assert np.array_equal(bed.fid, np.array(row0, dtype="str")) assert np.array_equal(bed.iid, np.array(row1, dtype="str")) assert np.array_equal(bed.sid, np.array(col, dtype="str")) if col_prop012 is not None: assert np.array_equal( bed.chromosome, np.array(col_prop012, dtype="str")) assert np.array_equal(bed.bp_position, np.array(col_prop012)) assert np.array_equal(bed.cm_position, np.array(col_prop012)) try: os.remove(filename) except: pass logging.info("done with 'test_writes'")
def test_write1_bed_f64cpp(tmp_path, shared_datadir): with open_bed(shared_datadir / "distributed_bed_test1_X.bed") as bed: for iid_index in [0, 1, 5]: for force_python_only in [False, True]: val = bed.read( np.s_[0:iid_index, :], order="F", dtype=np.float64, force_python_only=force_python_only, ) assert val.shape == (iid_index, 100) output = str(tmp_path / f"toydata.F64cpp.{iid_index}") open_bed.write(output, val, count_A1=False) val2 = open_bed(output, count_A1=False).read(dtype="float64") assert np.allclose(val, val2, equal_nan=True)
def test_zero_files(tmp_path): for force_python_only in [False, True]: for iid_count in [3, 0]: for sid_count in [5, 0]: for dtype in [np.int8, np.float32, np.float64]: val = np.zeros((iid_count, sid_count), dtype=dtype) if iid_count * sid_count > 0: val[0, 0] = 2 val[0, 1] = -127 if np.dtype(dtype) == np.int8 else np.nan filename = str(tmp_path / "zero_files.bed") # Write open_bed.write(filename, val, force_python_only=force_python_only) # Read with open_bed(filename) as bed2: val2 = bed2.read(dtype=dtype) assert np.allclose(val, val2, equal_nan=True) metadata2 = bed2.metadata for prop in metadata2.values(): assert len(prop) in {iid_count, sid_count} # Change metdata and write again if iid_count > 0: metadata2["iid"][0] = "iidx" if sid_count > 0: metadata2["sid"][0] = "sidx" open_bed.write( filename, val2, metadata=metadata2, force_python_only=force_python_only, ) # Read again with open_bed(filename) as bed3: val3 = bed3.read(dtype=dtype) assert np.allclose(val, val3, equal_nan=True) metadata3 = bed3.metadata for key2, value_list2 in metadata2.items(): value_list3 = metadata3[key2] assert np.array_equal(value_list2, value_list3)
def test_write1_x_x_cpp(tmp_path, shared_datadir): for count_A1 in [False, True]: with open_bed(shared_datadir / "distributed_bed_test1_X.bed", count_A1=count_A1) as bed: for order in ["C", "F", "A"]: for dtype in [np.float32, np.float64]: val = bed.read(order=order, dtype=dtype) metadata = bed.metadata val[-1, 0] = float("NAN") output = str(tmp_path / "toydata.{0}{1}.cpp".format( order, "32" if dtype == np.float32 else "64")) open_bed.write(output, val, metadata=metadata, count_A1=count_A1) val2 = open_bed(output, count_A1=count_A1).read(dtype=dtype) assert np.allclose(val, val2, equal_nan=True)
def test_coverage2(shared_datadir): with open_bed(shared_datadir / "plink_sim_10s_100v_10pmiss.bed", metadata={"iid": None}) as bed: assert len(bed.iid) > 1 with pytest.raises(ValueError): open_bed( shared_datadir / "plink_sim_10s_100v_10pmiss.bed", metadata={ "iid": [1, 2, 3], "mother": [1, 2] }, ) val = np.zeros((3, 5))[::2] assert not val.flags["C_CONTIGUOUS"] and not val.flags["F_CONTIGUOUS"] with pytest.raises(ValueError): open_bed.write("ignore", val) val = np.zeros((3, 5), dtype=np.str) with pytest.raises(ValueError): open_bed.write("ignore", val)
def test_write1(tmp_path, shared_datadir): in_file = shared_datadir / "plink_sim_10s_100v_10pmiss.bed" out_file = tmp_path / "out.bed" with open_bed(in_file) as bed: val0 = bed.read() metadata0 = { "fid": bed.fid, "iid": bed.iid, "sid": bed.sid, "chromosome": bed.chromosome, "cm_position": bed.cm_position, "bp_position": bed.bp_position, } open_bed.write(out_file, val0, metadata=metadata0) with open_bed(out_file) as bed1: assert np.allclose(val0, bed1.read(), equal_nan=True) assert np.array_equal(bed.fid, metadata0["fid"]) assert np.array_equal(bed.iid, metadata0["iid"]) assert np.array_equal(bed.sid, metadata0["sid"]) assert np.array_equal(bed.chromosome, metadata0["chromosome"]) assert np.allclose(bed.cm_position, metadata0["cm_position"]) assert np.allclose(bed.bp_position, metadata0["bp_position"]) val_float = val0.astype("float") val_float[0, 0] = 0.5 for force_python_only in [False, True]: with pytest.raises(ValueError): open_bed.write( out_file, val_float, metadata=metadata0, force_python_only=force_python_only, ) val_int8 = val0.astype("int8") val_int8[0, 0] = -1 for force_python_only in [False, True]: with pytest.raises(ValueError): open_bed.write( out_file, val_int8, metadata=metadata0, force_python_only=force_python_only, )