def __init__(self, url, timeout=30, utxo_cache=False, debug=False): self.sessionmaker = sessionmaker(bind=create_engine(url, connect_args={'connect_timeout': timeout}, encoding='utf8', echo=debug)) self.address_cache = LFUCache(maxsize=16384) self.txid_cache = RRCache(maxsize=131072) self.utxo_cache = RRCache(maxsize=262144) if utxo_cache else None super(DatabaseIO, self).__init__(self.sessionmaker(), address_cache=self.address_cache, txid_cache=self.txid_cache, utxo_cache=self.utxo_cache)
def __init__(self, raw_slices, pmap_slices, gt_slices, patch_radius, batch_size, cache_size=5, p_from_cache=0.97): self.raw_slices = raw_slices self.pmap_slices = pmap_slices self.gt_slices = gt_slices self.patch_radius = patch_radius self.batch_size = batch_size self.n_slices = raw_slices.shape[0] self.cache = RRCache(maxsize=cache_size) self.p_from_cache = p_from_cache
def get_cache(cache_type, cache_size): caches = { 'lfu': LFUCache(cache_size), 'lru': LRUCache(cache_size), #'rl' : RLCache(cache_size), 'rr': RRCache(cache_size) } try: return caches[cache_type] except KeyError: return default()
class MEGSexDataset(MEGDataset, BaseDatasetSex): LOAD_SUFFIX = '.npy' cache = RRCache(10000) # Do not cache the raw data @staticmethod @cached(cache) def get_features(path_to_file): return np.load(path_to_file[0]) @property def modality_folders(self): return ['raw/MEG']
def main(): cache_size = 5 data_size = 100 num_accesses = 10000 std_dev = 3 accesses, data = init(data_size, num_accesses, std_dev) cache = OPTCache(cache_size, accesses) optimal_hits, optimal_misses = run(cache, accesses, data) cache = RRCache(cache_size) cache_hits, cache_misses = run(cache, accesses, data) print(optimal_hits, optimal_misses) print(cache_hits, cache_misses)
class MEGRawRanges(MEGAgeRangesDataset): LOAD_SUFFIX = '.npy' cache = RRCache(10000) # Do not cache the raw data @staticmethod @cached(cache) def get_features(path_to_file): return np.load(path_to_file[0]) @property def modality_folders(self): return ['raw/MEG'] def inputshape(self): # FIXME should not have magic number, comes from assumed sample rate of 200 return 700, self.slice_length
def test_rr(self): cache = RRCache(maxsize=2, choice=min) self.assertEqual(min, cache.choice) cache[1] = 1 cache[2] = 2 cache[3] = 3 self.assertEqual(2, len(cache)) self.assertEqual(2, cache[2]) self.assertEqual(3, cache[3]) self.assertNotIn(1, cache) cache[0] = 0 self.assertEqual(2, len(cache)) self.assertEqual(0, cache[0]) self.assertEqual(3, cache[3]) self.assertNotIn(2, cache) cache[4] = 4 self.assertEqual(2, len(cache)) self.assertEqual(3, cache[3]) self.assertEqual(4, cache[4]) self.assertNotIn(0, cache)
import random import numpy as np import synth from target import GENE_LABELS, GENE_VALUES from deap import creator, base, tools import librosa from cachetools import cached, RRCache from cachetools.keys import hashkey # Define experiment settings sr = 44100 cache = RRCache(maxsize=100) def individual_to_params(individual): """ Converts an individual to a dictionary of parameter values """ return dict(zip(GENE_LABELS, individual)) def extract_features(sound_array): """ Extracts MFCC and spectral bandwidth, centroid, flatness, and roll-off It seems that only MFCC features already perform quite well """ return librosa.feature.mfcc(sound_array, sr).flatten() @cached(cache, key=lambda individual, target_features: hashkey(
def cache(self, maxsize, choice=choice, missing=None, getsizeof=None): return RRCache(maxsize, choice=choice, missing=missing, getsizeof=getsizeof)
class BaseDataset: DATASET_TARGETS = [HEADER_AGE] NUM_BUCKETS = 5 LOAD_SUFFIX = '.npy' # Not sure I like this... GENERATOR = SubjectFileLoader cache = RRCache(2 * 16384) @staticmethod # @lru_cache(maxsize=8192) @cached(cache) def get_features(path_to_file): """ Loads arrays from file, and returned as a flattened vector, cached to save some time :param path_to_file: :return: numpy vector """ # return loadmat(path_to_file, squeeze_me=True)['features'].ravel() l = np.load(path_to_file[0]) # # if self.megind is not None: # l = l[:, self.megind].squeeze() # l = zscore(l) return l @staticmethod def print_folds(buckets): for i, b in enumerate(buckets): b = np.array(b) print('Fold {0}, total datapoints: {1}'.format(i + 1, b.shape[0])) subjects, counts = np.unique(b[:, 0], return_counts=True) print('{0} Subjects used: {1}\nPoints per subject: {2}\n\n'.format( len(subjects), subjects, counts)) def __init__(self, toplevel, PDK=True, PA=True, VG=True, MO=False, batchsize=2): self.toplevel = Path(toplevel) # some basic checking to make sure we have the right directory if not self.toplevel.exists() or not self.toplevel.is_dir(): raise NotADirectoryError( "Provided top level directory is not directory") self.subject_hash = parsesubjects(self.toplevel / SUBJECT_STRUCT) self.batchsize = batchsize self.traindata = None tests = [] # Assemble which experiments we are going to be using if PDK: tests.append(TEST_PDK) if PA: tests.append(TEST_PA) if VG: tests.append(TEST_VG) if MO: tests.append(TEST_MO) if self.preprocessed_file in [ x.name for x in self.toplevel.iterdir() if not x.is_dir() ]: with (self.toplevel / self.preprocessed_file).open('rb') as f: print('Loaded previous preprocessing!') self.buckets, self.longest_vector, self.slice_length,\ self.testpoints, self.training_subjects = pickle.load(f) # list of subjects that we will use for the cross validation # self.leaveoutsubjects = np.unique(self.datapoints[:, 0]) # Todo: warn/update pickled file if new subjects exist self.print_folds(self.buckets) else: print('Preprocessing data...') self.training_subjects, self.longest_vector, self.slice_length = self.files_to_load( tests) if TEST_SUBJECTS: print('Forcing test subjects...') testsubjects = TEST_SUBJECTS else: testsubjects = np.random.choice( list(self.training_subjects.keys()), int(len(self.training_subjects) / 10), replace=False) self.testpoints = np.array([ item for x in testsubjects for item in self.training_subjects[x] ]) for subject in testsubjects: self.training_subjects.pop(subject) print('Subjects used for testing:', testsubjects) datapoint_ordering = sorted( self.training_subjects, key=lambda x: -len(self.training_subjects[x])) self.buckets = [[] for x in range(self.NUM_BUCKETS)] # Fill the buckets up and down for i in range(len(datapoint_ordering)): if int(i / self.NUM_BUCKETS) % 2: index = self.NUM_BUCKETS - (i % self.NUM_BUCKETS) - 1 self.buckets[int(index)].extend( self.training_subjects[datapoint_ordering[i]]) else: self.buckets[int(i % self.NUM_BUCKETS)].extend( self.training_subjects[datapoint_ordering[i]]) with (self.toplevel / self.preprocessed_file).open('wb') as f: pickle.dump( (self.buckets, self.longest_vector, self.slice_length, self.testpoints, self.training_subjects), f) # numpoints = self.datapoints.size[0] # ind = np.arange(numpoints) # ind = np.random.choice(ind, replace=False, size=int(0.2*numpoints) self.print_folds(self.buckets) self.next_leaveout(force=0) @property @abstractmethod def modality_folders(self) -> list: """ Subclasses must implement this so that it reports the name of the folder(s) to find experiments, once in the subject folder. :return: """ pass def files_to_load(self, tests): """ This should be implemented by subclasses to specify what files :param tests: The type of tests that should make up the dataset :return: A dictionary for the loaded subjects :rtype: tuple """ longest_vector = -1 slice_length = -1 loaded_subjects = {} for subject in tqdm([ x for x in self.toplevel.iterdir() if x.is_dir() and x.name in self.subject_hash.keys() ]): tqdm.write('Loading subject ' + subject.stem + '...') loaded_subjects[subject.stem] = [] for experiment in tqdm([ t for e in self.modality_folders if (subject / e).exists() for t in (subject / e).iterdir() if t.name in tests ]): for epoch in tqdm([ l for l in experiment.iterdir() if l.suffix == self.LOAD_SUFFIX ]): try: # f = loadmat(str(epoch), squeeze_me=True) f = self.get_features(tuple([epoch])) if np.isnan(f).any(): tqdm.write('NaNs found in ' + str(epoch)) time.sleep(1) # slice_length = max(slice_length, len(f['header'])) # longest_vector = max(longest_vector, # len(f['features'].reshape(-1))) slice_length = max(slice_length, f.shape[1]) longest_vector = max(longest_vector, f.shape[0] * f.shape[1]) loaded_subjects[subject.stem].append( (subject.stem, epoch)) except Exception as e: tqdm.write( 'Warning: Skipping file, error occurred loading: ' + str(epoch)) return loaded_subjects, longest_vector, slice_length @property @abstractmethod def preprocessed_file(self): pass def next_leaveout(self, force=None): """ Moves on to the next group to leaveout. :return: Number of which leaveout, `None` if complete """ if force is not None: self.leaveout = force if self.leaveout == self.NUM_BUCKETS: print('Have completed cross-validation') self.leaveout = None # raise CrossValidationComplete return self.leaveout # Select next bucket to leave out as evaluation self.eval_points = np.array(self.buckets[self.leaveout]) # Convert the remaining buckets into one list self.traindata = np.array([ item for sublist in self.buckets for item in sublist if self.buckets.index(sublist) != self.leaveout ]) self.leaveout += 1 return self.leaveout def current_leaveout(self): return self.leaveout def sanityset(self, fold=3, batchsize=None, flatten=True): """ Provides a generator for a small subset of data to ensure that the model can train to it :return: """ if batchsize is None: batchsize = self.batchsize return self.GENERATOR(np.array( self.buckets[fold] [int(0 * len(self.buckets[fold])):int(1 * len(self.buckets[fold]))]), self.toplevel, self.longest_vector, self.subject_hash, self.DATASET_TARGETS, self.slice_length, self.get_features, batchsize=batchsize, flatten=flatten) def trainingset(self, batchsize=None, flatten=True): """ Provides a generator object with the current training set :param flatten: Whether to flatten the resulting :param batchsize: :return: Generator of type :class`.SubjectFileLoader` """ if batchsize is None: batchsize = self.batchsize if self.traindata is None: raise AttributeError( 'No fold initialized... Try calling next_leaveout') return self.GENERATOR(self.traindata, self.toplevel, self.longest_vector, self.subject_hash, self.DATASET_TARGETS, self.slice_length, self.get_features, batchsize=batchsize, flatten=flatten) def evaluationset(self, batchsize=None, flatten=True): """ Provides a generator object with the current training set :param batchsize: :return: Generator of type :class`.SubjectFileLoader` """ if batchsize is None: batchsize = self.batchsize return self.GENERATOR(self.eval_points, self.toplevel, self.longest_vector, self.subject_hash, self.DATASET_TARGETS, self.slice_length, self.get_features, batchsize=batchsize, flatten=flatten, evaluate=True) def testset(self, batchsize=None, flatten=True): """ Provides a generator object with the current training set :param batchsize: :return: Generator of type :class`.SubjectFileLoader` """ if batchsize is None: batchsize = self.batchsize return self.GENERATOR(self.testpoints, self.toplevel, self.longest_vector, self.subject_hash, self.DATASET_TARGETS, self.slice_length, self.get_features, batchsize=batchsize, flatten=flatten, evaluate=True) def inputshape(self): return int(self.longest_vector // self.slice_length), self.slice_length def outputshape(self): return len(self.DATASET_TARGETS)
class IsbiJ3Feeder(object): def __init__(self, raw_slices, pmap_slices, gt_slices, patch_radius, batch_size, cache_size=5, p_from_cache=0.97): self.raw_slices = raw_slices self.pmap_slices = pmap_slices self.gt_slices = gt_slices self.patch_radius = patch_radius self.batch_size = batch_size self.n_slices = raw_slices.shape[0] self.cache = RRCache(maxsize=cache_size) self.p_from_cache = p_from_cache def __call__(self): batch_images = [None]*self.batch_size batch_gt = [None]*self.batch_size batch_gt_quali = [None]*self.batch_size for i in range(self.batch_size): # get a patch extractor patch_extractor = self.__get_random_slice_data() # get a random 0-cell index # but we only consider 0-cells with # a since of 3 n_patches = len(patch_extractor) j3_labels = patch_extractor.j3_labels assert len(j3_labels) >=1 done = False while(not done): rand_index = random.randint(0, len(j3_labels)-1) cell_0_label = j3_labels[rand_index] assert cell_0_label >= 1 cell_0_index = cell_0_label - 1 try: done = True img, gt, gt_quali = patch_extractor[cell_0_index] except: print("hubs....") done = False # img shape atm : x,y,c # => desired 1,c,x,y img = numpy.rollaxis(img, 2,0)[None,...] batch_images[i] = img batch_gt[i] = gt batch_gt_quali[i] = gt_quali batch_images = numpy.concatenate(batch_images,axis=0) #print("batch_gt",batch_gt) batch_gt = numpy.array(batch_gt) batch_gt_quali = numpy.array(batch_gt_quali) # batch_images: (batch_size, c, x,y) # batch_gt: (batch_size, 3) return batch_images, batch_gt, batch_gt_quali def __get_random_slice_data(self): take_from_cache = random.random() >= (1.0 - self.p_from_cache ) if take_from_cache and len(self.cache)>0: # get random item from cache via pop # (since this is a random cache this # will lead to a random item) per_slice_data = self.__get_random_from_cache() return per_slice_data else: # (maybe) compute new # random slice slice_index = random.randint(0, self.n_slices-1) # get the per_slice_data from cache iff already in cache. # Iff not in cache, compute per_slice_data and put to cache per_slice_data = self.__force_to_cache(slice_index=slice_index) return per_slice_data def __get_random_from_cache(self): assert len(self.cache) > 0 slice_index, per_slice_data = self.cache.popitem() self.cache[slice_index] = per_slice_data return per_slice_data def __force_to_cache(self, slice_index): if slice_index in self.cache: per_slice_data = self.cache[slice_index] return per_slice_data else: per_slice_data = self.__compute_per_slice_data(slice_index) self.cache[slice_index] = per_slice_data return per_slice_data def __edge_gt_to_node_gt(self, edge_gt): # the edge_gt is on membrane level # 0 at membranes pixels # 1 at non-membrane piper_slice_dataxels seeds = nifty.segmentation.localMaximaSeeds(edge_gt) growMap = nifty.filters.gaussianSmoothing(1.0-edge_gt, 1.0) growMap += 0.1*nifty.filters.gaussianSmoothing(1.0-edge_gt, 6.0) gt = nifty.segmentation.seededWatersheds(growMap, seeds=seeds) return gt def __compute_per_slice_data(self, slice_index): raw_slice = self.raw_slices[slice_index,:,:] gt_slice = self.gt_slices[slice_index,:,:] pmap_slice = self.pmap_slices[slice_index,:,:] edge_gt = self.gt_slices[slice_index,:,:] node_gt = self.__edge_gt_to_node_gt(edge_gt) # randomized overseg threshold = random.uniform(0.275, 0.55) overseg = nifty.segmentation.distanceTransformWatersheds(pmap_slice.copy(), threshold=threshold) hl_cgp = HlCgp(overseg) cell_0_patch_extrator = Cell0PatchExtrator(hl_cgp, image=raw_slice, node_gt=node_gt, radius=self.patch_radius) return cell_0_patch_extrator
def __init__(self, max_dim): """ :param max_dim: """ self.cache = RRCache(maxsize=max_dim)
class CoapRRCache(CoapCache): def __init__(self, max_dim): """ :param max_dim: """ self.cache = RRCache(maxsize=max_dim) def update(self, key, element): """ :param key: :param element: :return: """ print "updating cache" print "key: ", key.hashkey print "element: ", element self.cache.update([(key.hashkey, element)]) def get(self, key): """ :param key: :return: CacheElement """ try: print "Getting cache response" response = self.cache[key.hashkey] except KeyError: print "problem here" response = None return response def is_full(self): """ :return: """ if self.cache.currsize == self.cache.maxsize: return True return False def is_empty(self): """ :return: """ if self.cache.currsize == 0: return True return False def debug_print(self): """ :return: """ print "size = ", self.cache.currsize list = self.cache.items() for key, element in list: print "element.max age ", element.max_age print "element.uri", element.uri print "element.freshness ", element.freshness