import time import tensorflow as tf import numpy as np #import matplotlib.pyplot as plt import zarr from zarr import blosc blosc.set_nthreads(18) from tqdm import tqdm from ord_endo_tf import ORd # Initial Conditions v, nai, nass, ki, kass = -87, 7, 7, 150, 150 cai, cass, cansr, cajsr, m = 1.0e-4, 1.0e-4, 1.2, 1.2, 0 hf, hs, j, hsp, jp = 1, 1, 1, 1, 1 mL, hL, hLp, a, iF = 0, 1, 1, 0, 1 iS, ap, iFp, iSp, d = 1, 0, 1, 1, 0 ff, fs, fcaf, fcas, jca = 1, 1, 1, 1, 1 nca, ffp, fcafp, xrf, xrs = 0, 1, 1, 0, 0 xs1, xs2, xk1, Jrelnp, Jrelp = 0, 0, 1, 0, 0 CaMKt = 0 y0 = [ v, nai, nass, ki, kass, cai, cass, cansr, cajsr, m, hf, hs, j, hsp, jp, mL, hL, hLp, a, iF, iS, ap, iFp, iSp, d, ff, fs, fcaf, fcas, jca, nca, ffp, fcafp, xrf, xrs, xs1, xs2, xk1, Jrelnp, Jrelp, CaMKt ] # Number of models to train pop_size = 100000
# A standard multiprocessing library mated to zarr - a standard example of writing to zarr file using multiprocessing. # View discussion and source code here: https://github.com/zarr-developers/zarr-python/issues/199 # View block / hang discussion here: https://github.com/zarr-developers/numcodecs/issues/41 # View using Dask for multi-threading here: https://clouds.eos.ubc.ca/~phil/courses/parallel_python/03_dask_and_zarr.html import zarr import numpy as np from pprint import pprint import multiprocessing from zarr import blosc blosc.set_nthreads(20) blosc.use_threads = False # This must be set to false to prevent locking in the blosc context # look into synchronized zarr API: https://zarr.readthedocs.io/en/stable/api/sync.html synchronizer = zarr.ProcessSynchronizer('example.sync') processed_zarr = zarr.hierarchy.open_group("test.zarr", 'a', synchronizer=synchronizer) features_arr = np.random.random_sample((10000, 20)) processed_zarr.create_dataset("features_arr", data=features_arr, shape=features_arr.shape, dtype="float64", overwrite=True) ixs = np.arange(processed_zarr["features_arr"].shape[0]) slices = np.linspace(0, processed_zarr["features_arr"].shape[0] - 1, 100, dtype=np.int32)
import sys sys.path.insert(0, '.') sys.path.insert(0, '../shared') import time from typing import List, IO import psutil import numpy as np import zarr from zarr.hierarchy import Group from zarr import blosc from utilities import MAX_POSITIONS, NUM_SAMPLES, PLINK_PREF, ZARR_DB from ch2 import conv_chrom, encode_alleles blosc.set_nthreads(1) proc = psutil.Process() print('block_size\tnum_threads\twall_time\tuser_time\tio_wait') for block_size in [ 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384 ]: for num_threads in [1, 2, 4, 8]: root = zarr.open('ignore.zarr', mode='w') blosc.set_nthreads(num_threads) start_cpu = proc.cpu_times() start_time = time.time() conv_chrom(f'{PLINK_PREF}.tped', block_size, MAX_POSITIONS, root, 1) end_cpu = proc.cpu_times() end_time = time.time() wall_time = end_time - start_time
import sys import zarr from zarr import blosc sys.path.insert(0, '../shared') from utilities import PLINK_PREF from ch2 import conv_chrom, encode_alleles_tuple MAX_POSITIONS = 100000 BLOCK_SIZE = int(sys.argv[1]) blosc.set_nthreads(1) root = zarr.open('ignore.zarr', mode='w') conv_chrom(f'{PLINK_PREF}.tped', BLOCK_SIZE, MAX_POSITIONS, root, 1, encode_alleles=encode_alleles_tuple, encode_fun=tuple) #memory size # number of runs - we are doing only 1
config['clevel'] = self.clevel config['shuffle'] = self.shuffle return config def __repr__(self): r = '%s(cname=%r, clevel=%r, shuffle=%r)' % \ (type(self).__name__, text_type(self.cname, 'ascii'), self.clevel, self.shuffle) return r codec_registry[Blosc.codec_id] = Blosc # initialize blosc ncores = multiprocessing.cpu_count() blosc.init() blosc.set_nthreads(min(8, ncores)) atexit.register(blosc.destroy) def _ndarray_from_buffer(buf, dtype): if isinstance(buf, np.ndarray): arr = buf.reshape(-1, order='A').view(dtype) else: arr = np.frombuffer(buf, dtype=dtype) return arr class Delta(Codec): """Filter to encode data as the difference between adjacent values. Parameters