def test_fastavro_compatibility_deserialize( schema_root: str, schema_identifier: str, schemata: cerializer.schemata.CerializerSchemata) -> None: # patch for not working avro codec cerializer.tests.dev_utils.init_fastavro() namespace = schema_identifier.split('.')[0] schema_name = schema_identifier.split('.')[1] cerializer_codec = cerializer.cerializer.Cerializer( cerializer_schemata=schemata, namespace=namespace, schema_name=schema_name, ) try: # mypy things yaml has no attribute unsafe_load_all, which is not true data_all = yaml.unsafe_load_all( # type: ignore open(os.path.join(schema_root, 'example.yaml'))) SCHEMA_FAVRO = yaml.load(open(os.path.join(schema_root, 'schema.yaml')), Loader=yaml.Loader) for data in data_all: output_fastavro = io.BytesIO() fastavro.schemaless_writer(output_fastavro, SCHEMA_FAVRO, data) output_fastavro.seek(0) deserialized = cerializer_codec.deserialize( output_fastavro.getvalue()) output_fastavro.seek(0) assert deserialized == fastavro.schemaless_reader( output_fastavro, SCHEMA_FAVRO) except FileNotFoundError: logging.warning( 'Missing schema or Example file for schema == %s', schema_name, ) assert False
def loadHDF5(fn, **kwargs): with h5py.File(fn, 'r') as f: locs = f['locs'][:] info_fn = os.path.splitext(fn)[0] + ".yaml" with open(info_fn, "r") as file: if hasattr(yaml, 'unsafe_load'): obj = yaml.unsafe_load_all(file) else: obj = yaml.load_all(file) obj = list(obj)[0] imgshape = np.array([obj['Height'], obj['Width']]) if 'z' in locs.dtype.fields: dims = 3 else: dims = 2 ds = Dataset(len(locs), dims, imgshape, haveSigma='sx' in locs.dtype.fields, **kwargs) ds.photons[:] = locs['photons'] ds.background[:] = locs['bg'] ds.pos[:, 0] = locs['x'] ds.pos[:, 1] = locs['y'] if dims == 3: ds.pos[:, 2] = locs['z'] ds.crlb.pos[:, 2] = locs['lpz'] ds.crlb.pos[:, 0] = locs['lpx'] ds.crlb.pos[:, 1] = locs['lpy'] ds.crlb.photons = locs['lI'] ds.crlb.bg = locs['lbg'] if 'lsx' in locs.dtype.fields: # picasso doesnt save crlb for the sigma fits ds.crlb.sigma[:, 0] = locs['lsx'] ds.crlb.sigma[:, 1] = locs['lsy'] if ds.hasPerSpotSigma(): ds.data.estim.sigma[:, 0] = locs['sx'] ds.data.estim.sigma[:, 1] = locs['sy'] ds.frame[:] = locs['frame'] if 'chisq' in locs.dtype.fields: ds.data.chisq = locs['chisq'] if 'group' in locs.dtype.fields: ds.data.group = locs['group'] ds['locs_path'] = fn return ds
def test_load_yaml_config_raises_error_if_unsafe_yaml(): """Test error raised if unsafe YAML.""" with open(YAML_PATH, "w") as fp: fp.write("- !!python/object/apply:os.system []") with patch.object( os, "system") as system_mock, contextlib.suppress(HomeAssistantError): config_util.load_yaml_config_file(YAML_PATH) assert len(system_mock.mock_calls) == 0 # Here we validate that the test above is a good test # since previously the syntax was not valid with open(YAML_PATH) as fp, patch.object(os, "system") as system_mock: list(yaml.unsafe_load_all(fp)) assert len(system_mock.mock_calls) == 1
def benchmark(number: int = 1000, preheat_number: int = 10) -> str: results = [] report = [] report_json = [] for schema_identifier, path in tqdm.tqdm(list( cerializer.utils.iterate_over_schemata()), desc='Benchmarking'): setup = f''' import cerializer.tests.dev_utils as t import cerializer.tests.benchmark as b import yaml import cerializer.cerializer import fastavro import os import io import json from __main__ import schemata as CERIALIZER_SCHEMATA t.init_fastavro() schema_identifier = '{schema_identifier}' #CERIALIZER_SCHEMATA = b.schemata() cerializer_codec = cerializer.cerializer.Cerializer( cerializer_schemata = CERIALIZER_SCHEMATA, namespace = schema_identifier.split('.')[0], schema_name = schema_identifier.split('.')[1] ) data = list(yaml.unsafe_load_all(open(os.path.join('{path}', 'example.yaml'))))[0] # type: ignore SCHEMA_FAVRO = fastavro.parse_schema( yaml.load(open(os.path.join('{path}', 'schema.yaml')), Loader = yaml.Loader) ) for i in range({preheat_number}): output_cerializer = cerializer_codec.serialize(data) deserialized_data = cerializer_codec.deserialize(output_cerializer) output_fastavro = io.BytesIO() fastavro.schemaless_writer(output_fastavro, SCHEMA_FAVRO, data) x = output_fastavro.getvalue() output_fastavro.seek(0) res = fastavro.schemaless_reader(output_fastavro, SCHEMA_FAVRO) import io import json import yaml import avro import avro.schema from avro.datafile import DataFileReader, DataFileWriter from avro.io import DatumReader, DatumWriter import datetime schema = yaml.load(open(os.path.join('{path}', 'schema.yaml')), Loader = yaml.Loader) try: data_avro = yaml.load(open(os.path.join('{path}', 'example_avro.yaml')), Loader = yaml.Loader) except: data_avro = data parsed_schema = avro.schema.parse(json.dumps(schema)) ''' stmt_avro = ''' output_avro = io.BytesIO() writer = avro.io.DatumWriter(parsed_schema) encoder = avro.io.BinaryEncoder(output_avro) writer.write(data_avro, encoder) raw_bytes = output_avro.getvalue() bytes_reader = io.BytesIO(raw_bytes) decoder = avro.io.BinaryDecoder(bytes_reader) reader = avro.io.DatumReader(parsed_schema) data_deserialized = reader.read(decoder) ''' stmt_cerializer = ''' output_cerializer = cerializer_codec.serialize(data) deserialized_data = cerializer_codec.deserialize(output_cerializer) ''' stmt_fastavro = ''' output_fastavro = io.BytesIO() fastavro.schemaless_writer(output_fastavro, SCHEMA_FAVRO, data) x = output_fastavro.getvalue() output_fastavro.seek(0) res = fastavro.schemaless_reader(output_fastavro, SCHEMA_FAVRO) ''' stmt_json = ''' json_encoded = json.dumps(data) json_decoded = json.loads(json_encoded) ''' # we do this since we do first one half and the second half half_number = int(number / 2) result_cerializer_1 = timeit.timeit(stmt=stmt_cerializer, setup=setup, number=half_number) result_fastavro_1 = timeit.timeit(stmt=stmt_fastavro, setup=setup, number=half_number) result_avro_1 = timeit.timeit(stmt=stmt_avro, setup=setup, number=half_number) result_fastavro_2 = timeit.timeit(stmt=stmt_fastavro, setup=setup, number=half_number) result_avro_2 = timeit.timeit(stmt=stmt_avro, setup=setup, number=half_number) result_cerializer_2 = timeit.timeit(stmt=stmt_cerializer, setup=setup, number=half_number) data = list( yaml.unsafe_load_all(open(os.path.join(path, 'example.yaml'))))[0] try: import json # this will fail if the data has components that are not JSON serializable such as datetime serialized = json.dumps(data) result_json_1 = 0 if True else timeit.timeit( stmt=stmt_json, setup=setup, number=half_number) result_json_2 = 0 if True else timeit.timeit( stmt=stmt_json, setup=setup, number=half_number) result_json = result_json_1 + result_json_2 except: result_json = 0 result_cerializer = result_cerializer_1 + result_cerializer_2 result_fastavro = result_fastavro_1 + result_fastavro_2 result_avro = result_avro_1 + result_avro_2 maximum = max(result_cerializer, result_fastavro, result_avro) max_json = max(result_cerializer, result_json) if result_json: report_json.append( f'{schema_identifier.ljust(36, " ")},{result_cerializer/max_json},{result_json/max_json}' ) results.append( f'{schema_identifier.ljust(36, " ")},{result_cerializer/maximum},{result_fastavro/maximum},{result_avro/maximum}' ) for r in results: report.append(r) benchmark_header = '============================== BENCHMARK RESULTS ==============================' benchmark_header_json = '=========================== BENCHMARK RESULTS JSON ============================' return benchmark_header + '\n' + '\n'.join( report) + '\n' + '\n' + benchmark_header_json + '\n' + '\n'.join( report_json)
yaml.unsafe_load(payload) # $ decodeInput=payload decodeOutput=yaml.unsafe_load(..) decodeFormat=YAML decodeMayExecuteInput yaml.full_load(payload) # $ decodeInput=payload decodeOutput=yaml.full_load(..) decodeFormat=YAML decodeMayExecuteInput # Safe: yaml.load(payload, yaml.SafeLoader) # $ decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML yaml.load(payload, Loader=yaml.SafeLoader) # $decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML yaml.load(payload, yaml.BaseLoader) # $decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=yaml.safe_load(..) decodeFormat=YAML ################################################################################ # load_all variants ################################################################################ # Unsafe: yaml.load_all(payload) # $ decodeInput=payload decodeOutput=yaml.load_all(..) decodeFormat=YAML decodeMayExecuteInput yaml.unsafe_load_all(payload) # $ decodeInput=payload decodeOutput=yaml.unsafe_load_all(..) decodeFormat=YAML decodeMayExecuteInput yaml.full_load_all(payload) # $ decodeInput=payload decodeOutput=yaml.full_load_all(..) decodeFormat=YAML decodeMayExecuteInput # Safe: yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=yaml.safe_load_all(..) decodeFormat=YAML ################################################################################ # C-based loaders with `libyaml` ################################################################################ # Unsafe: yaml.load(payload, yaml.CLoader) # $ decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML decodeMayExecuteInput yaml.load(payload, yaml.CFullLoader) # $ decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML decodeMayExecuteInput # Safe: yaml.load(payload, yaml.CSafeLoader) # $decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML