def test_delitem_slice(): slt = SortedList(range(100)) slt._reset(17) del slt[10:40:1] del slt[10:40:-1] del slt[10:40:2] del slt[10:40:-2]
def test_delitem(): random.seed(0) slt = SortedList(range(100), load=17) while len(slt) > 0: pos = random.randrange(len(slt)) del slt[pos] slt._check()
def score_of_a_vacated_people(self, universo, work='translations'): factor = math.sqrt(len(universo)) scores = SortedList(load=round(factor)) for (people, score) in self.__scores__().items(): if people in universo: scores.add(TranslatorScore(people, score[work])) return scores.pop(0)
def iana_rir_gen_ip_list(user_rir_list): # generates a list of networks that can be blocked by RIR # we use a SortedList so that elements are inserted in order. This allows cidr_merge to work rir_slash_eight_list = SortedList() with open('iana') as iana_file: iana_csv = csv.reader(iana_file) for line in iana_csv: for rir in user_rir_list: # case in which the whois line from our csv contains the RIR if rir in line[3]: network = line[0].lstrip('0') rir_slash_eight_list.add(netaddr.IPNetwork(network)) # if we find a match, there is no reason to see if the other RIRs are on the same line break # run cidr_merge to summarize rir_slash_eight_list = netaddr.cidr_merge(rir_slash_eight_list) return rir_slash_eight_list
def test_copy_copy(): import copy alpha = SortedList(range(100), load=7) beta = copy.copy(alpha) alpha.add(100) assert len(alpha) == 101 assert len(beta) == 100
def test_setitem_extended_slice(): slt = SortedList(range(0, 1000, 10), load=17) lst = list(range(0, 1000, 10)) lst[10:90:10] = range(105, 905, 100) slt[10:90:10] = range(105, 905, 100) assert slt == lst slt._check()
def extract_collocations(self, metric_class): assert issubclass(metric_class, Metric) metric = metric_class() collocations = SortedList(key=lambda x: -x[0]) unigram_counts = self.language_model.get_unigrams() bigram_counts = self.language_model.get_bigrams() for (first, last), freq_bigram in bigram_counts.items(): if self.exclude_punctuation: if first in self.PUNCT or last in self.PUNCT: continue if self.exclude_conj: if first in self.CONJ_RU or last in self.CONJ_RU: continue if self.exclude_props: if first in self.PROPOSITIONS_RU or last in self.PROPOSITIONS_RU: continue freq_first, freq_last = unigram_counts[first], unigram_counts[last] metric_val = metric.evaluate(freq_first, freq_last, freq_bigram, self.language_model.get_vocab_size()) collocations.add((metric_val, freq_first, freq_last, freq_bigram, first, last)) return collocations
def test_eq(): this = SortedList(range(10), load=4) that = SortedList(range(20), load=4) assert not (this == that) that.clear() that.update(range(10)) assert this == that
def predict(self, X): y = np.zeros(len(X)) for i,x in enumerate(X): # test points sl = SortedList(load=self.k) # stores (distance, class) tuples for j,xt in enumerate(self.X): # training points diff = x - xt d = diff.dot(diff) if len(sl) < self.k: # don't need to check, just add sl.add( (d, self.y[j]) ) else: if d < sl[-1][0]: del sl[-1] sl.add( (d, self.y[j]) ) # print "input:", x # print "sl:", sl # vote votes = {} for _, v in sl: # print "v:", v votes[v] = votes.get(v,0) + 1 # print "votes:", votes, "true:", Ytest[i] max_votes = 0 max_votes_class = -1 for v,count in votes.iteritems(): if count > max_votes: max_votes = count max_votes_class = v y[i] = max_votes_class return y
class DijkstraFixedPoint: def __init__(self, automaton, initial_set, accepted_set): self.automaton = automaton self.set_to_visit = SortedList(initial_set,key= lambda d: -len(d)) self.accepted_set = accepted_set def iter_fix_point_set(self,max_size=10): if len(self.set_to_visit)==0: raise StopIteration() F = self.set_to_visit.pop() nF = {k:[v] for k,v in F.items()} new_size_of_fp = len(nF) reach_accepted_set = False for u,lu in F.items(): labelled_edges = self.automaton.get_labelled_successors(u) succ = labelled_edges[lu] for s in succ: if s in self.accepted_set: reach_accepted_set = True if (s not in nF) and (s not in self.accepted_set): nF[s] = list(self.automaton.get_successor_labels(s)) new_size_of_fp = len(nF) if new_size_of_fp>max_size: return False,F newF = self.expand_successor_set(nF) if F in newF: newF.remove(F) self.set_to_visit.update(newF) accept_fix_point = (len(newF)==0) and reach_accepted_set return accept_fix_point,F def expand_successor_set(self,nF): sF = [] # import operator # size = reduce(operator.mul, [len(v) for v in nF.values()], 1) for conf in itertools.product(*nF.values()): sF.append({k:v for k,v in zip(nF.keys(),conf)}) return sF def __iter__(self): return self def next(self): return self.iter_fix_point_set() def next_fixed_point(self,max_size): fp_found = 0 try: while fp_found==False: fp_found,fp = self.iter_fix_point_set(max_size) #print "#"*len(fp) except StopIteration: return False,None return fp_found,fp
def test_pickle(): import pickle alpha = SortedList(range(10000)) alpha._reset(500) beta = pickle.loads(pickle.dumps(alpha)) assert alpha == beta assert alpha._load == 500 assert beta._load == 1000
def find_latest(self): sorted = SortedList() for i in self.bucket.list(prefix=self.db_name): parts = i.name.split('/') if len(parts) == 3: d = datetime.datetime.strptime(parts[1], "%m%d%Y").date() sorted.add(d) return sorted[len(sorted)-1].strftime('%m%d%Y')
class InMemoryBackend(object): """ The backend that keeps the results in the memory. """ def __init__(self, *args, **kwargs): def get_timestamp(result): return timestamp_parser.parse(result['timestamp']) self._results = dict() self._sorted = SortedList(key=get_timestamp) def disconnect(self): return succeed(None) def store(self, result): """ Store a single benchmarking result and return its identifier. :param dict result: The result in the JSON compatible format. :return: A Deferred that produces an identifier for the stored result. """ id = uuid4().hex self._results[id] = result self._sorted.add(result) return succeed(id) def retrieve(self, id): """ Retrive a result by the given identifier. """ try: return succeed(self._results[id]) except KeyError: return fail(ResultNotFound(id)) def query(self, filter, limit=None): """ Return matching results. """ matching = [] for result in reversed(self._sorted): if len(matching) == limit: break if filter.viewitems() <= result.viewitems(): matching.append(result) return succeed(matching) def delete(self, id): """ Delete a result by the given identifier. """ try: result = self._results.pop(id) self._sorted.remove(result) return succeed(None) except KeyError: return fail(ResultNotFound(id))
def read_rirs(country_list, permit, rir_list=RIR_NAMES): # list containing our file objects file_list = [] # we use a SortedList so that elements are inserted in order. This allows cidr_merge to work rir_ips = SortedList() # Open the files we downloaded earlier and store the file object for rir in rir_list: file_list.append(open(rir)) for f in file_list: for line in f: curr_line = line.split('|') try: # we want only the ipv4 lines that are for a specific country # also only want countries that we are going to block if (curr_line[2] == "ipv4" and curr_line[1] != "*") and \ ((permit and curr_line[1] not in country_list) or (not permit and curr_line[1] in country_list)): country_code = curr_line[1] network_id = curr_line[3] wildcard = int(curr_line[4])-1 try: # Add network to list, if the number of IPs was not a # power of 2 (wildcard is not valid). # AddrFormatError is thrown rir_ips.add(netaddr.IPNetwork(network_id + "/" + str(netaddr.IPAddress(wildcard)))) # Handle case in where our mask is invalid by rounding DOWN except netaddr.AddrFormatError: print "rounded network " + network_id + " with " + str(wildcard) + \ " hosts up to nearest power of 2" wildcard = next_power_of_2(wildcard) - 1 print wildcard + 1 rir_ips.add(netaddr.IPNetwork(network_id + "/" + str(netaddr.IPAddress(wildcard)))) # IndexErrors only occur when parsing columns we don't need except IndexError: pass f.close() # cidr_merge takes our list of IPs and summarizes subnets where possible # this greatly decreases the number of ACL entries rir_ips = netaddr.cidr_merge(rir_ips) return rir_ips
def test_op_add(): this = SortedList(range(10)) this._reset(4) assert (this + this + this) == (this * 3) that = SortedList(range(10)) that._reset(4) that += that that += that assert that == (this * 4)
def dir(self, file_pattern): attrs = self.sftp.listdir_attr(self.remote_dir) filtered = SortedList() for attr in attrs: if hasattr(attr, "filename"): filename = attr.filename if re.match(file_pattern, filename): remote_file = RemoteFile(filename, attr.st_mtime) filtered.add(remote_file) return filtered
def test_setitem(): random.seed(0) slt = SortedList(range(0, 100, 10), load=4) values = list(enumerate(range(5, 105, 10))) random.shuffle(values) for pos, val in values: slt[pos] = val slt[-2] = 85 slt._check()
def test_bisect_left(): slt = SortedList() assert slt.bisect_left(0) == 0 slt = SortedList(range(100), load=17) slt.update(range(100)) slt._check() assert slt.bisect_left(50) == 100 assert slt.bisect_left(200) == 200
def test_bisect_right(): slt = SortedList() assert slt.bisect_right(10) == 0 slt = SortedList(range(100), load=17) slt.update(range(100)) slt._check() assert slt.bisect_right(10) == 22 assert slt.bisect_right(200) == 200
def test_contains(): slt = SortedList() assert 0 not in slt slt.update(range(10000)) for val in range(10000): assert val in slt assert 10000 not in slt slt._check()
def arrayRDP(arr, epsilon=0.0, n=None): """ This is a slightly modified version of the _aRDP function, that accepts as arguments the tolerance in the distance and the maximum number of points the algorithm can select. **Note:** The results of this algoritm should be identical to the arrayRDP function if the *n* parameter is not specified. In that case, the performance is slightly worse, although the asymptotic complexity is the same. For this reason, this function internally delegates the solution in that function if the *n* parameter is missing. Parameters ---------- arr: Array of values of consecutive points. epsilon: Maximum difference allowed in the simplification process. n: Maximum number of points of the resulted simplificated array. Returns ------- out: Array of indices of the selected points. """ if n is None: return _aRDP(arr, epsilon) if epsilon <= 0.0: raise ValueError('Epsilon must be > 0.0') n = n or len(arr) if n < 3: return arr fragments = SortedDict() #We store the distances as negative values due to the default order of #sorteddict dist, idx = max_vdist(arr, 0, len(arr) - 1) fragments[(-dist, idx)] = (0, len(arr) - 1) while len(fragments) < n-1: (dist, idx), (first, last) = fragments.popitem(last=False) if -dist <= epsilon: #We have to put again the last item to prevent loss fragments[(dist, idx)] = (first, last) break else: #We have to break the fragment in the selected index dist, newidx = max_vdist(arr, first, idx) fragments[(-dist, newidx)] = (first, idx) dist, newidx = max_vdist(arr, idx, last) fragments[(-dist, newidx)] = (idx, last) #Now we have to get all the indices in the keys of the fragments in order. result = SortedList(i[0] for i in fragments.itervalues()) result.add(len(arr) - 1) return np.array(result)
def test_delete(): slt = SortedList(range(20)) slt._reset(4) slt._check() for val in range(20): slt.remove(val) slt._check() assert len(slt) == 0 assert slt._maxes == [] assert slt._lists == []
def test_update(): slt = SortedList() slt.update(range(1000)) assert all(tup[0] == tup[1] for tup in zip(slt, list(range(1000)))) assert len(slt) == 1000 slt._check() slt.update(range(10000)) assert len(slt) == 11000 slt._check()
def test_delitem(): random.seed(0) slt = SortedList(range(100)) slt._reset(17) while len(slt) > 0: pos = random.randrange(len(slt)) del slt[pos] slt._check() slt = SortedList(range(100)) slt._reset(17) del slt[:] assert len(slt) == 0 slt._check()
def test_count(): slt = SortedList() slt._reset(7) assert slt.count(0) == 0 for iii in range(100): for jjj in range(iii): slt.add(iii) slt._check() for iii in range(100): assert slt.count(iii) == iii assert slt.count(100) == 0
def test_getitem(): random.seed(0) slt = SortedList(load=17) lst = list() for rpt in range(100): val = random.random() slt.add(val) lst.append(val) lst.sort() assert all(slt[idx] == lst[idx] for idx in range(100)) assert all(slt[idx - 99] == lst[idx - 99] for idx in range(100))
def get_3_most_ambiguous(self, X, Y): P = self.predict_proba(X) N = len(X) sl = SortedList(load=3) # stores (distance, sample index) tuples for n in xrange(N): p = P[n] dist = np.abs(p - 0.5) if len(sl) < 3: sl.add( (dist, n) ) else: if dist < sl[-1][0]: del sl[-1] sl.add( (dist, n) ) indexes = [v for k, v in sl] return X[indexes], Y[indexes]
def __init__(self, *args, **kwargs): """ A PriorityDict provides the same methods as a dict. Additionally, a PriorityDict efficiently maintains its keys in value sorted order. Consequently, the keys method will return the keys in value sorted order, the popitem method will remove the item with the highest value, etc. An optional *iterable* provides an initial series of items to populate the PriorityDict. Like collections.Counter, items are counted from iterable. If keyword arguments are given, the keywords themselves with their associated values are added as items to the dictionary. If a key is specified both in the positional argument and as a keyword argument, the value associated with the keyword is retained in the dictionary. For example, these all return a dictionary equal to ``{"one": 2, "two": 3}``: * ``PriorityDict(one=2, two=3)`` * ``PriorityDict({'one': 2, 'two': 3})`` * ``PriorityDict(['one', 'two', 'one', 'two', 'two')`` The first example only works for keys that are valid Python identifiers; the others work with any valid keys. """ self._dict = dict() self._list = SortedList() self.iloc = _IlocWrapper(self) self.update(*args, **kwargs)
def test_remove(): slt = SortedList() assert slt.discard(0) == None assert len(slt) == 0 slt._check() slt = SortedList([1, 2, 2, 2, 3, 3, 5], load=4) slt.remove(2) slt._check() assert all(tup[0] == tup[1] for tup in zip(slt, [1, 2, 2, 3, 3, 5]))
def test_setitem_slice(): slt = SortedList(range(100), load=17) slt[:10] = iter(range(10)) assert slt == list(range(100)) slt[:10:2] = iter(val * 2 for val in range(5)) assert slt == list(range(100)) slt[:50] = range(-50, 50) assert slt == list(range(-50, 100)) slt[:100] = range(50) assert slt == list(range(100)) slt[:] = range(100) assert slt == list(range(100)) slt[90:] = [] assert slt == list(range(90)) slt[:10] = [] assert slt == list(range(10, 90)) slt._check()
def test_pickle(): import pickle alpha = SortedList(range(10000), load=500) beta = pickle.loads(pickle.dumps(alpha)) assert alpha == beta assert alpha._load == beta._load
def test_pop(): slt = SortedList(range(10), load=4) slt._check() assert slt.pop() == 9 slt._check() assert slt.pop(0) == 0 slt._check() assert slt.pop(-2) == 7 slt._check() assert slt.pop(4) == 5 slt._check()
def test_check(): slt = SortedList(range(10), load=4) slt._len = 5 slt._check()
def test_irange(): sl = SortedList(load=7) assert [] == list(sl.irange()) values = list(range(53)) sl.update(values) for start in range(53): for end in range(start, 53): assert list(sl.irange(start, end)) == values[start:(end + 1)] assert list(sl.irange(start, end, reverse=True)) == values[start:(end + 1)][::-1] for start in range(53): for end in range(start, 53): assert list(range(start, end)) == list(sl.irange(start, end, (True, False))) for start in range(53): for end in range(start, 53): assert list(range(start + 1, end + 1)) == list(sl.irange(start, end, (False, True))) for start in range(53): for end in range(start, 53): assert list(range(start + 1, end)) == list(sl.irange(start, end, (False, False))) for start in range(53): assert list(range(start, 53)) == list(sl.irange(start)) for end in range(53): assert list(range(0, end)) == list(sl.irange(None, end, (True, False))) assert values == list(sl.irange(inclusive=(False, False))) assert [] == list(sl.irange(53)) assert values == list(sl.irange(None, 53, (True, False)))
def test_copy(): alpha = SortedList(range(100), load=7) beta = alpha.copy() alpha.add(100) assert len(alpha) == 101 assert len(beta) == 100
def __init__(self, spec_file: str = '', spec_dict: Optional[Mapping[str, Any]] = None, load: bool = False, use_time_stamp: bool = True, init_buffer_path=None, **kwargs) -> None: LoggingBase.__init__(self) if spec_file: specs = read_yaml(spec_file) else: specs = spec_dict self.specs = specs params = specs['params'] if load: self.work_dir = Path(spec_file).parent else: suffix = params.get('suffix', '') prefix = params.get('prefix', '') if use_time_stamp: unique_name = time.strftime('%Y%m%d%H%M%S') unique_name = get_full_name(unique_name, prefix, suffix) else: unique_name = f'{prefix}' if prefix else '' if suffix: unique_name = f'{unique_name}_{suffix}' if unique_name else f'{suffix}' self.work_dir = Path(specs['root_dir']) / f'{unique_name}' write_yaml(self.work_dir / 'params.yaml', specs, mkdir=True) self.load = load self.seed = params.get('seed', 10) self.ndim = params['ndim'] self.bsize = params['batch_size'] self.hiddens = params['hidden_list'] self.niter = params['niter'] self.goal = params['goal_value'] self.mode = params['mode'] self.viz_rate = self.niter // 10 self.lr = params['lr'] self.nepochs = params['nepochs'] self.nsamples = params['nsamples'] self.n_init_samples = params['n_init_samples'] self.init_nepochs = params['init_nepochs'] self.cut_off = params['cut_off'] self.beta = params['beta'] self.nr_mix = params['nr_mix'] self.base_fn = params['base_fn'] self.only_pos = params['only_positive'] # whether to run 1000 epochs of training for the later round of iteration self.full_training = params['full_training_last'] self.input_scale = params['input_scale'] self.fixed_sigma = params.get('fixed_sigma', None) self.on_policy = params.get('on_policy', False) self.problem_type = params.get('problem_type', 'csp') self.allow_repeated = params.get('allow_repeated', False) self.allow_repeated = self.on_policy or self.allow_repeated self.important_sampling = params.get('important_sampling', False) self.visited_dist: Optional[nn.Module] = None self.visited_fixed_sigma = params.get('visited_fixed_sigma', None) self.visited_nr_mix = params.get('visited_nr_mix', None) self.explore_coeff = params.get('explore_coeff', None) self.nepoch_visited = params.get('nepoch_visited', -1) self.normalize_weight = params.get('normalize_weight', True) self.add_ent_before_norm = params.get( 'add_entropy_before_normalization', False) self.weight_type = params.get('weight_type', 'ind') self.model_visited = self.explore_coeff is not None or self.important_sampling if self.model_visited and self.nepoch_visited == -1: raise ValueError( 'nepoch_visited should be specified when a model is ' 'learning visited states') self.init_buffer_paths = init_buffer_path eval_fn = params['eval_fn'] try: self.fn = registered_functions[eval_fn] except KeyError: raise ValueError(f'{eval_fn} is not a valid benchmark function') self.device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') print(f'device: {self.device}') self.cpu = torch.device('cpu') self.model: Optional[nn.Module] = None self.buffer = None self.opt = None # hacky version of passing input vectors around self.input_vectors_norm = [ np.linspace(start=-1.0, stop=1.0, dtype='float32', num=100) for _ in range(self.ndim) ] self.input_vectors = [ self.input_scale * vec for vec in self.input_vectors_norm ] # TODO: remove this hacky way of keeping track of delta self.delta = self.input_vectors_norm[0][-1] - self.input_vectors_norm[ 0][-2] # keep track of lo and hi for indicies self.params_min = np.array([0] * self.ndim) self.params_max = np.array([len(x) - 1 for x in self.input_vectors]) self.fvals = SortedList()
class AutoRegSearch(LoggingBase): # noinspection PyUnusedLocal def __init__(self, spec_file: str = '', spec_dict: Optional[Mapping[str, Any]] = None, load: bool = False, use_time_stamp: bool = True, init_buffer_path=None, **kwargs) -> None: LoggingBase.__init__(self) if spec_file: specs = read_yaml(spec_file) else: specs = spec_dict self.specs = specs params = specs['params'] if load: self.work_dir = Path(spec_file).parent else: suffix = params.get('suffix', '') prefix = params.get('prefix', '') if use_time_stamp: unique_name = time.strftime('%Y%m%d%H%M%S') unique_name = get_full_name(unique_name, prefix, suffix) else: unique_name = f'{prefix}' if prefix else '' if suffix: unique_name = f'{unique_name}_{suffix}' if unique_name else f'{suffix}' self.work_dir = Path(specs['root_dir']) / f'{unique_name}' write_yaml(self.work_dir / 'params.yaml', specs, mkdir=True) self.load = load self.seed = params.get('seed', 10) self.ndim = params['ndim'] self.bsize = params['batch_size'] self.hiddens = params['hidden_list'] self.niter = params['niter'] self.goal = params['goal_value'] self.mode = params['mode'] self.viz_rate = self.niter // 10 self.lr = params['lr'] self.nepochs = params['nepochs'] self.nsamples = params['nsamples'] self.n_init_samples = params['n_init_samples'] self.init_nepochs = params['init_nepochs'] self.cut_off = params['cut_off'] self.beta = params['beta'] self.nr_mix = params['nr_mix'] self.base_fn = params['base_fn'] self.only_pos = params['only_positive'] # whether to run 1000 epochs of training for the later round of iteration self.full_training = params['full_training_last'] self.input_scale = params['input_scale'] self.fixed_sigma = params.get('fixed_sigma', None) self.on_policy = params.get('on_policy', False) self.problem_type = params.get('problem_type', 'csp') self.allow_repeated = params.get('allow_repeated', False) self.allow_repeated = self.on_policy or self.allow_repeated self.important_sampling = params.get('important_sampling', False) self.visited_dist: Optional[nn.Module] = None self.visited_fixed_sigma = params.get('visited_fixed_sigma', None) self.visited_nr_mix = params.get('visited_nr_mix', None) self.explore_coeff = params.get('explore_coeff', None) self.nepoch_visited = params.get('nepoch_visited', -1) self.normalize_weight = params.get('normalize_weight', True) self.add_ent_before_norm = params.get( 'add_entropy_before_normalization', False) self.weight_type = params.get('weight_type', 'ind') self.model_visited = self.explore_coeff is not None or self.important_sampling if self.model_visited and self.nepoch_visited == -1: raise ValueError( 'nepoch_visited should be specified when a model is ' 'learning visited states') self.init_buffer_paths = init_buffer_path eval_fn = params['eval_fn'] try: self.fn = registered_functions[eval_fn] except KeyError: raise ValueError(f'{eval_fn} is not a valid benchmark function') self.device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') print(f'device: {self.device}') self.cpu = torch.device('cpu') self.model: Optional[nn.Module] = None self.buffer = None self.opt = None # hacky version of passing input vectors around self.input_vectors_norm = [ np.linspace(start=-1.0, stop=1.0, dtype='float32', num=100) for _ in range(self.ndim) ] self.input_vectors = [ self.input_scale * vec for vec in self.input_vectors_norm ] # TODO: remove this hacky way of keeping track of delta self.delta = self.input_vectors_norm[0][-1] - self.input_vectors_norm[ 0][-2] # keep track of lo and hi for indicies self.params_min = np.array([0] * self.ndim) self.params_max = np.array([len(x) - 1 for x in self.input_vectors]) self.fvals = SortedList() @classmethod def set_seed(cls, seed): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) # noinspection PyUnresolvedReferences torch.cuda.manual_seed_all(seed) def get_probs(self, xin: torch.Tensor, model: nn.Module, debug=False): """Given an input tensor (N, dim) computes the probabilities across the cardinality space of each dimension. prob.shape = (N, dim, K) where K is number of possible values for each dimension. Assume that xin is normalized to [-1,1], and delta is given.""" delta = self.delta xin = xin.to(self.device) xhat = model(xin) dim = self.ndim if model is self.model: nparams_per_dim_mix = 2 if self.fixed_sigma else 3 sigma_fixed = self.fixed_sigma is not None else: nparams_per_dim_mix = 2 if self.visited_fixed_sigma else 3 sigma_fixed = self.visited_fixed_sigma is not None coeffs = torch.stack( [xhat[..., i::dim * nparams_per_dim_mix] for i in range(dim)], dim=-2) # Note: coeffs was previously interpreted as log_coeffs # interpreting outputs if NN as log is dangerous, can result in Nan's. # solution: here they should be positive and should add up to 1, sounds familiar? softmax! coeffs_norm = coeffs.softmax(dim=-1) eps = 1e-15 xb = xin[..., None] + torch.zeros(coeffs.shape, device=self.device) if self.base_fn in ['logistic', 'normal']: means = torch.stack([ xhat[..., i + dim::dim * nparams_per_dim_mix] for i in range(dim) ], dim=-2) if sigma_fixed: sigma = self.fixed_sigma if model is self.model else self.visited_fixed_sigma else: log_sigma = torch.stack( [xhat[..., i + 2 * dim::dim * 3] for i in range(dim)], dim=-2) # put a cap on the value of output so that it does not blow up log_sigma = torch.min( log_sigma, torch.ones(log_sigma.shape).to(self.device) * 50) # put a bottom on the value of output so that it does not diminish and becomes zero log_sigma = torch.max( log_sigma, torch.ones(log_sigma.shape).to(self.device) * (-40)) sigma = log_sigma.exp() if self.base_fn == 'logistic': plus_cdf = cdf_logistic(xb + delta / 2, means, sigma) minus_cdf = cdf_logistic(xb - delta / 2, means, sigma) else: plus_cdf = cdf_normal(xb + delta / 2, means, sigma) minus_cdf = cdf_normal(xb - delta / 2, means, sigma) elif self.base_fn == 'uniform': # does not work with self.fixed_sigma if self.fixed_sigma: raise ValueError( 'base_fn cannot be uniform when fixed_sigma is given!') center = torch.stack( [xhat[..., i + dim::dim * 3] for i in range(dim)], dim=-2) # normalize center between [-1,1] to cover all the space center = 2 * (center - center.min()) / (center.max() - center.min() + eps) - 1 log_delta = torch.stack( [xhat[..., i + 2 * dim::dim * 3] for i in range(dim)], dim=-2) # put a cap on the value of output so that it does not blow up log_delta = torch.min(log_delta, torch.ones(log_delta.shape) * 50) bdelta = log_delta.exp() a = center - bdelta / 2 b = center + bdelta / 2 plus_cdf = cdf_uniform(xb + delta / 2, a, b) minus_cdf = cdf_uniform(xb - delta / 2, a, b) else: raise ValueError(f'unsupported base_fn = {self.base_fn}') # -1 is mapped to (-inf, -1+d/2], 1 is mapped to [1-d/2, inf), and other 'i's are mapped to # [i-d/2, i+d/2)n probs_nonedge = plus_cdf - minus_cdf probs_right_edge = 1 - minus_cdf probs_left_edge = plus_cdf l_cond = xb <= (-1 + delta / 2) r_cond = xb >= (1 - delta / 2) n_cond = ~(l_cond | r_cond) cdfs = probs_left_edge * l_cond + probs_right_edge * r_cond + probs_nonedge * n_cond probs = (coeffs_norm * cdfs).sum(-1) if debug: pdb.set_trace() return probs def get_nll(self, xin: torch.Tensor, model: nn.Module, weights=None, debug=False): """Given an input tensor computes the average negative likelihood of observing the inputs""" probs = self.get_probs(xin, model=model) eps_tens = 1e-15 logp_vec = (probs + eps_tens).log().sum(-1) if weights is None: min_obj = -logp_vec.mean(-1) else: pos_ind = (weights > 0).float() neg_ind = 1 - pos_ind # obj_term = - self.buffer.size * (weights * prob_x).data # ent_term = self.buffer.size * (self.beta * (torch.tensor(1) + logp_vec)).data obj_term = -weights.data # TODO: Fix this bad code if self.add_ent_before_norm: ent_term = 0 else: ent_term = (self.beta * (1 + logp_vec)).data # important sampling coefficient (with frozen gradient) if self.important_sampling: probs_visited = self.get_probs(xin, model=self.visited_dist) logp_visited = (probs_visited + eps_tens).log().sum(-1).to(logp_vec) # is_coeff = (torch.tensor(10**(-self.ndim * 2)).log() - logp_visited).exp() is_coeff = torch.clamp((logp_vec - logp_visited).exp(), 1e-15, 1e15).data is_coeff = is_coeff / is_coeff.max() else: is_coeff = 1 main_obj = obj_term * logp_vec * is_coeff ent_obj = 1 / self.ndim * ent_term * logp_vec * is_coeff npos = pos_ind.sum(-1) npos = 1 if npos == 0 else npos pos_main_obj = (main_obj * pos_ind).sum(-1) / npos pos_ent_obj = (ent_obj * pos_ind).sum(-1) / npos nneg = neg_ind.sum(-1) nneg = 1 if nneg == 0 else nneg neg_main_obj = (main_obj * neg_ind).sum(-1) / nneg neg_ent_obj = (ent_obj * neg_ind).sum(-1) / nneg if self.only_pos: min_obj = (pos_main_obj + pos_ent_obj) / self.ndim else: min_obj = (pos_main_obj + neg_main_obj + pos_ent_obj + neg_ent_obj) / self.ndim if debug: for w, lp in zip(weights, logp_vec): print(f'w = {w:10.4}, prob = {torch.exp(lp):10.4}') # probs = self.get_probs(xin, debug=True) foo = torch.autograd.grad(min_obj, model.net[0].weight, retain_graph=True) print(foo) pdb.set_trace() if torch.isnan(min_obj): print(min_obj) pdb.set_trace() return min_obj @classmethod def sample_probs(cls, probs: torch.Tensor, index: int): """Given a probability distribution tensor (shape = (N, D, K)) returns 1 sample from the distribution probs[:, index, :], the output is indices""" p = probs[..., index] sample = p.multinomial(num_samples=1).squeeze(-1) return sample def sample_model(self, nsamples: int, model: nn.Module) -> Tuple[torch.Tensor, torch.Tensor]: """samples the current model nsamples times and returns both normalized samples i.e between [-1, 1] and sample indices Parameters ---------- nsamples: int number of samples Returns ------- samples: Tuple[torch.Tensor, torch.Tensor] normalized samples / sample indices """ model.eval() dim = self.ndim # GPU friendly sampling if self.device != torch.device('cpu'): total_niter = -(-nsamples // self.bsize) xsample_list, xsample_ind_list = [], [] for iter_cnt in range(total_niter): if iter_cnt == total_niter - 1: bsize = nsamples - iter_cnt * self.bsize else: bsize = self.bsize xsample = torch.zeros(bsize, dim, device=self.device) xsample_ind = torch.zeros(bsize, dim, device=self.device) for i in range(dim): n = len(self.input_vectors_norm[i]) xin = torch.zeros(bsize, n, dim, device=self.device) if i >= 1: xin = torch.stack([xsample] * n, dim=-2) in_torch = torch.from_numpy(self.input_vectors_norm[i]).to( self.device) xin[..., i] = torch.stack([in_torch] * bsize) xin_reshaped = xin.view((bsize * n, dim)) probs_reshaped = self.get_probs(xin_reshaped, model=model) probs = probs_reshaped.view((bsize, n, dim)) xi_ind = self.sample_probs(probs, i) # ith x index xsample[:, i] = xin[..., i][range(bsize), xi_ind] xsample_ind[:, i] = xi_ind xsample_ind_list.append(xsample_ind) xsample_list.append(xsample) xsample = torch.cat(xsample_list, dim=0) xsample_ind = torch.cat(xsample_ind_list, dim=0) return xsample, xsample_ind else: samples = [] samples_ind = [] for k in range(nsamples): xsample = torch.zeros(1, dim) xsample_ind = torch.zeros(1, dim) for i in range(dim): n = len(self.input_vectors_norm[i]) xin = torch.zeros(n, dim) if i >= 1: xin = torch.stack([xsample.squeeze()] * n) xin[:, i] = torch.from_numpy(self.input_vectors_norm[i]) # TODO: For normal dist this probs gets a lot of mass on the edges probs = self.get_probs(xin, model=model) xi_ind = self.sample_probs(probs, i) # ith x index xsample[0, i] = torch.tensor( self.input_vectors_norm[i][xi_ind]) xsample_ind[0, i] = xi_ind samples.append(xsample.squeeze()) samples_ind.append(xsample_ind.squeeze()) samples = torch.stack(samples, dim=0) samples_ind = torch.stack(samples_ind, dim=0) return samples, samples_ind def run_epoch(self, data: np.ndarray, weights: np.ndarray, model: nn.Module, mode='train', debug=False): # for model in [self.visited_dist, self.model]: model.train() if mode == 'train' else model.eval() n, dim, _ = data.shape assert n != 0, 'no data found' bsize = max(self.bsize, 2**math.floor(math.log2( n / 4))) if mode == 'train' else n nstep = n // bsize if mode == 'train' else 1 nll_per_b = 0 for step in range(nstep): xb = data[step * bsize:step * bsize + bsize] wb = weights[step * bsize:step * bsize + bsize] xb_tens = torch.from_numpy(xb).to(self.device) wb_tens = torch.from_numpy(wb).to(self.device) xin = xb_tens[:, 0, :] if model is self.model: loss = self.get_nll(xin, weights=wb_tens, model=self.model, debug=debug) else: loss = self.get_nll(xin, model=self.visited_dist) if mode == 'train': self.opt.zero_grad() loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), 1e3) self.opt.step() nll_per_b += loss.to(self.cpu).item() / nstep return nll_per_b def collect_samples(self, n_samples, uniform=False): n_collected = 0 new_samples = [] vecs = self.input_vectors norm_vecs = self.input_vectors_norm # a counter for corner cases to tell the algorithm to explore more n_repetitive_samples = 0 while n_collected < n_samples: if uniform or n_repetitive_samples > 1e2 * n_samples: _, xnew_id_np = Random.sample_data(self.ndim, self.input_vectors_norm, 1) xnew_id_np = xnew_id_np.astype('int') else: _, xnew_ind = self.sample_model(1, model=self.model) xnew_id_np = xnew_ind.to(self.cpu).data.numpy().astype('int') # simulate and compute the adjustment weights org_sample_list = [ vecs[index][pos] for index, pos in enumerate(xnew_id_np[0, :]) ] xsample = np.array(org_sample_list, dtype='float32') fval = self.fn(xsample[None, :]) self.fvals.add(fval) norm_sample_list = [ norm_vecs[index][pos] for index, pos in enumerate(xnew_id_np[0, :]) ] norm_sample = np.array(norm_sample_list, dtype='float32') if self.allow_repeated or norm_sample not in self.buffer: self.buffer.add_samples(norm_sample[None, :], xnew_id_np, fval) new_samples.append(xsample) n_collected += 1 else: n_repetitive_samples += 1 print(f'item {norm_sample} already exists!') return new_samples def _clip_and_round(self, samples): lo = np.zeros(samples.shape) + self.params_min hi = np.zeros(samples.shape) + self.params_max out_samples = np.clip(samples, lo, hi) out_samples = np.floor(out_samples).astype('int') return out_samples def _sample_model_with_weights(self, nsample): xsample, xsample_ids, fvals = self._sample_model_for_eval(nsample) xsample_norm = index_to_xval(self.input_vectors_norm, xsample_ids) zavg = sorted(fvals, reverse=(self.mode == 'ge'))[self.cut_off] print(f'fref: {zavg}') if self.weight_type == 'ind': weights = weight2(fvals, self.goal, zavg, self.mode, self.problem_type) else: weights = weight(fvals, self.goal, zavg, self.mode) # weights = self.update_weight(xsample, weights) return np.stack([xsample_norm, xsample_ids], axis=1), weights def update_weight(self, xin, wtr): eps_tens = 1e-15 xin_tens = torch.from_numpy(xin).to(self.device) normalized = False if self.add_ent_before_norm: probs = self.get_probs(xin_tens, model=self.model) logp_vec = (probs + eps_tens).log().sum(-1).cpu().data.numpy() ent_term = (1 + logp_vec) / self.ndim ent_term = (ent_term - ent_term.mean()) / (ent_term.std() + eps_tens) # this normalization happens regardless of self.nomralize_weight flag wtr = (wtr - wtr.mean()) / (wtr.std() + eps_tens) wtr = wtr - self.beta * ent_term if self.beta != 0 and self.normalize_weight: wtr = (wtr - wtr.mean()) / (wtr.std() + eps_tens) normalized = True # this is just an experimentation # normalize before adding exploration penalty if self.explore_coeff is not None: probs_visited = self.get_probs(xin_tens, model=self.visited_dist) logp_visited = (probs_visited + eps_tens).log().sum(-1) / self.ndim / 2 probs = self.get_probs(xin_tens, model=self.model) logp_vec = (probs + eps_tens).log().sum(-1) is_coeff = (logp_vec - logp_visited).exp() is_coeff = is_coeff / is_coeff.max() print('std: ', is_coeff.std()) logp_visited = logp_visited.data.numpy() wtr = wtr - self.explore_coeff * logp_visited / (is_coeff.std() + eps_tens).item() if self.normalize_weight: wtr = (wtr - wtr.mean()) / (wtr.std() + eps_tens) normalized = True if not normalized and self.normalize_weight: wtr = (wtr - wtr.mean()) / (wtr.std() + eps_tens) return wtr def train(self, iter_cnt: int, nepochs: int, split=1.0): # treat the sampled data as a static data set and take some gradient steps on it print('-' * 50) if self.on_policy and iter_cnt != 0: # TODO: this is a stupid implementation, but ok for now xtr, wtr = self._sample_model_with_weights(self.nsamples) else: xtr, xte, wtr, wte = self.buffer.draw_tr_te_ds( split=split, normalize_weight=False) if self.model_visited: print('Training buffer model:') nepochs = self.init_nepochs if iter_cnt == 0 else self.nepoch_visited for epoch_id in range(nepochs): tr_nll = self.run_epoch(xtr, wtr, self.visited_dist, mode='train', debug=False) print(f'[visit_{iter_cnt}] epoch {epoch_id} loss = {tr_nll}') print('Finshed training buffer model') if (iter_cnt) % 10 == 0 and self.ndim == 2: _, xvisited_ind = self.sample_model(1000, model=self.visited_dist) self._plot_dist(xvisited_ind, 'dist', 'visited', f'{iter_cnt+1}') update_w = self.update_weight(xtr[:, 0, :], wtr) # debug if iter_cnt < -1: values = index_to_xval(self.input_vectors, xtr[:, 1, :].astype('int')) fvals = self.fn(values) wtr_norm = (wtr - wtr.mean()) / (wtr.std() + 1e-15) fref = sorted(fvals)[self.cut_off - 1] print(f'fred = {fref}') cond = np.logical_and(fvals >= 20, fvals <= fref) for index, wp, wn, wnorm in zip(xtr[:, 1, :][cond], wtr[cond], update_w[cond], wtr_norm[cond]): print(f'index = {index}, weight_before_update = {wp:.4f}, ' f'weights_norm = {wnorm:.4f}, ' f'weight_after_update = {wn:.4f}') pdb.set_trace() wtr = update_w if self.ndim == 2: fpath = self.work_dir / get_full_name( name='dist', prefix='training', suffix=f'{iter_cnt}_before') samples = index_to_xval(self.input_vectors, xtr[:, 1, :].astype('int')) s = self.input_scale plt_hist2D(samples, fpath=fpath, range=np.array([[-s, s], [-s, s]]), cmap='binary') # per epoch tr_loss = 0 te_loss = 0 tr_loss_list = [] print(f'Training model: fref = {self.buffer.zavg}') for epoch_id in range(nepochs): tr_nll = self.run_epoch(xtr, wtr, self.model, mode='train', debug=False) tr_loss_list.append(tr_nll) tr_loss += tr_nll / self.nepochs # self.writer.add_scalar('loss', tr_nll, epoch_id) print(f'[train_{iter_cnt}] epoch {epoch_id} loss = {tr_nll}') if split < 1: te_nll = self.run_epoch(xte, wte, self.model, mode='test') te_loss += te_nll / self.nepochs print(f'[test_{iter_cnt}] epoch {epoch_id} loss = {te_nll}') print('Finished training model.') if split < 1: return tr_loss, te_loss return tr_loss, tr_loss_list def save_checkpoint(self, saved_dict): saved_dict.update( dict(buffer=self.buffer, model_state=self.model.state_dict(), opt_state=self.opt.state_dict())) if self.model_visited: saved_dict.update(dict(visited=self.visited_dist.state_dict())) torch.save(saved_dict, self.work_dir / 'checkpoint.tar') def load_checkpoint(self, ckpt_path: Union[str, Path]): s = time.time() checkpoint = torch.load(ckpt_path, map_location=self.device) self.model.load_state_dict(checkpoint.pop('model_state')) if self.model_visited: self.visited_dist.load_state_dict(checkpoint.pop('visited')) params = list(self.model.parameters()) + list( self.visited_dist.parameters()) else: params = self.model.parameters() self.opt.load_state_dict(checkpoint.pop('opt_state')) # override optimizer with input parameters self.opt = optim.Adam(params, self.lr) self.buffer = checkpoint.pop('buffer') print(f'Model checkpoint loaded in {time.time() - s:.4f} seconds') return checkpoint def setup_model(self): dim = self.ndim nparams_per_dim_mix = 2 if self.fixed_sigma else 3 self.model: nn.Module = MADE(dim, self.hiddens, dim * nparams_per_dim_mix * self.nr_mix, seed=self.seed, natural_ordering=True) self.model.to(self.device) if self.model_visited: nparams = 2 if self.visited_fixed_sigma else 3 self.visited_dist: nn.Module = MADE(dim, self.hiddens, dim * nparams * self.visited_nr_mix, seed=self.seed, natural_ordering=True) self.visited_dist.to(self.device) params = list(self.model.parameters()) + list( self.visited_dist.parameters()) else: params = list(self.model.parameters()) self.opt = optim.Adam(params, lr=self.lr, weight_decay=0) self.buffer = CacheBuffer(self.mode, self.goal, self.cut_off, self.allow_repeated, self.problem_type) if self.init_buffer_paths: for path in self.init_buffer_paths: # init_buffer can be either the final buffer of another algorithm (checkpoint.tar) # or the init_buffer of another one (init_buffer.pickle) if path.endswith('checkpoint.tar'): ref_buffer: CacheBuffer = torch.load( path, map_location=self.device)['buffer'] else: ref_buffer: CacheBuffer = read_pickle( self.init_buffer_paths)['init_buffer'] for ind in ref_buffer.db_set: self.buffer.add_samples(ind.item[None, :], ind.item_ind[None, :], np.array([ind.val])) print('Buffer initialized with the provided initialization.') def setup_model_state(self): # load the model or proceed without loading checkpoints if self.load: ckpt_dict = self.load_checkpoint(self.work_dir / 'checkpoint.tar') tr_losses = ckpt_dict['tr_losses'] iter_cnt = ckpt_dict['iter_cnt'] avg_cost = ckpt_dict['avg_cost'] sim_cnt_list = ckpt_dict['sim_cnt'] n_sols_in_buffer = ckpt_dict['n_sols_in_buffer'] sample_cnt_list = ckpt_dict['sample_cnt'] top_means = dict(top_20=ckpt_dict['top_20'], top_40=ckpt_dict['top_40'], top_60=ckpt_dict['top_60']) else: # collect samples using the random initial model (probably a bad initialization) iter_cnt = 0 tr_losses, avg_cost, \ sim_cnt_list, sample_cnt_list, n_sols_in_buffer = [], [], [], [], [] top_means = dict(top_20=[], top_40=[], top_60=[]) self.model.eval() self.collect_samples(self.n_init_samples, uniform=True) write_pickle(self.work_dir / 'init_buffer.pickle', dict(init_buffer=self.buffer)) # train the init model self.model.train() self.train(0, self.init_nepochs) if self.ndim == 2: _, xdata_ind = self.sample_model(1000, model=self.model) fpath = self.work_dir / get_full_name( name='dist', prefix='training', suffix=f'0_after') data_ind = xdata_ind.to(self.cpu).data.numpy().astype('int') data = index_to_xval(self.input_vectors, data_ind) s = self.input_scale _range = np.array([[-s, s], [-s, s]]) plt_hist2D(data, fpath=fpath, range=_range, cmap='binary') saved_data = dict( iter_cnt=iter_cnt, tr_losses=tr_losses, avg_cost=avg_cost, sim_cnt=sim_cnt_list, n_sols_in_buffer=n_sols_in_buffer, sample_cnt=sample_cnt_list, **top_means, ) self.save_checkpoint(saved_data) return iter_cnt, tr_losses, avg_cost, sim_cnt_list, sample_cnt_list, n_sols_in_buffer, \ top_means def _plot_dist(self, data_indices: torch.Tensor, name, prefix, suffix): fpath = self.work_dir / get_full_name(name, prefix, suffix) data_ind = data_indices.to(self.cpu).data.numpy().astype('int') data = index_to_xval(self.input_vectors, data_ind) s = self.input_scale _range = np.array([[-s, s], [-s, s]]) plt_hist2D(data, fpath=fpath, range=_range, cmap='binary') def _run_alg(self): self.setup_model() ret = self.setup_model_state() iter_cnt, tr_losses, avg_cost, \ sim_cnt_list, sample_cnt_list, n_sols_in_buffer, top_means = ret while iter_cnt < self.niter: print(f'iter {iter_cnt}') # ---- update plotting variables sim_cnt_list.append(self.buffer.size) n_sols_in_buffer.append(self.buffer.n_sols) sample_cnt_list.append(self.buffer.tot_freq) top_means['top_20'].append(np.mean(self.fvals[:20])) top_means['top_40'].append(np.mean(self.fvals[:40])) top_means['top_60'].append(np.mean(self.fvals[:60])) # top_means['top_20'].append(self.buffer.topn_mean(20)) # top_means['top_40'].append(self.buffer.topn_mean(40)) # top_means['top_60'].append(self.buffer.topn_mean(60)) self.collect_samples(self.nsamples) avg_cost.append(self.buffer.mean) if iter_cnt == self.niter - 1 and self.full_training: tr_loss, tr_loss_list = self.train(iter_cnt + 1, self.nepochs * 40) else: tr_loss, tr_loss_list = self.train(iter_cnt + 1, self.nepochs) tr_losses.append(tr_loss_list) if (iter_cnt + 1) % 10 == 0 and self.ndim == 2: _, xdata_ind = self.sample_model(1000, model=self.model) self._plot_dist(xdata_ind, 'dist', 'training', f'{iter_cnt+1}_after') iter_cnt += 1 saved_data = dict( iter_cnt=iter_cnt, tr_losses=tr_losses, avg_cost=avg_cost, sim_cnt=sim_cnt_list, n_sols_in_buffer=n_sols_in_buffer, sample_cnt=sample_cnt_list, **top_means, ) self.save_checkpoint(saved_data) plot_learning_with_epochs(fpath=self.work_dir / 'learning_curve.png', training=tr_losses) plot_cost(avg_cost, fpath=self.work_dir / 'cost.png') plot_x_y( sample_cnt_list, n_sols_in_buffer, #annotate=sim_cnt_list,marker='s', fillstyle='none' fpath=self.work_dir / 'n_sols.png', xlabel='n_freq', ylabel=f'n_sols') def _sample_model_for_eval( self, nsamples) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: _, sample_ids = self.sample_model(nsamples, model=self.model) sample_ids_arr = sample_ids.long().to(torch.device('cpu')).numpy() xsample_arr = index_to_xval(self.input_vectors, sample_ids_arr) fval = self.fn(xsample_arr) return xsample_arr, sample_ids_arr, fval def report_variation(self, nsamples): xsample, _, fval = self._sample_model_for_eval(nsamples) total_var = compute_emprical_variation(xsample) if self.mode == 'le': pos_samples = xsample[fval <= self.goal] pos_var = compute_emprical_variation(pos_samples) else: pos_samples = xsample[fval >= self.goal] pos_var = compute_emprical_variation(pos_samples) print(f'total solution variation / dim = {total_var:.6f}') if np.isnan(pos_var): raise ValueError('did not find any satisfying solutions!') print(f'pos solution variation / dim = {pos_var:.6f}') def report_accuracy(self, ntimes, nsamples): accuracy_list, times, div_list = [], [], [] if self.ndim == 2: xsamples, _, _ = self._sample_model_for_eval(nsamples) s = self.input_scale _range = np.array([[-s, s], [-s, s]]) plt_hist2D(xsamples, range=_range, fpath=self.work_dir / get_full_name('trained_policy'), cmap='binary') for iter_id in range(ntimes): s = time.time() xsample, sample_ids, fval = self._sample_model_for_eval(nsamples) if self.mode == 'le': acc = (fval <= self.goal).sum(-1) / nsamples pos_samples = xsample[fval <= self.goal] else: acc = (fval >= self.goal).sum(-1) / nsamples pos_samples = xsample[fval >= self.goal] if len(pos_samples) >= self.ndim: div = get_diversity_fom(self.ndim, pos_samples) div_list.append(div) times.append(time.time() - s) accuracy_list.append(acc) acc_mean = 100 * float(np.mean(accuracy_list)) acc_std = 100 * float(np.std(accuracy_list)) acc_div = float(np.mean(div_list)) if div_list else 0 print( f'gen_time / sample = {1e3 * np.mean(times).astype("float") / nsamples:.3f} ms' ) print(f'accuracy_avg = {acc_mean:.6f}, accuracy_std = {acc_std:.6f}, ' f'solution diversity = {acc_div:.6f}') return acc_mean, acc_std, acc_div def load_and_sample(self, nsamples, only_positive=False) -> np.ndarray: """sets up the model (i.e. initializes the weights .etc) and generates samples""" self.setup_model() self.setup_model_state() xsample, _, fval = self._sample_model_for_eval(nsamples) if not only_positive: return xsample n_remaining = nsamples ans_list = [] while n_remaining > 0: if self.mode == 'le': pos_samples = xsample[fval <= self.goal] else: pos_samples = xsample[fval >= self.goal] ans_list.append(pos_samples) n_remaining -= len(pos_samples) print( f"sampled {len(pos_samples)} pos_solutions, n_remaining: {n_remaining}" ) if n_remaining > 0: xsample, _, fval = self._sample_model_for_eval(n_remaining) ans = np.concatenate(ans_list, axis=0) return ans def plot_model_sol_pca(self, nsamples=100): xsample, sample_ids, fval = self._sample_model_for_eval(nsamples) if self.mode == 'le': pos_samples = xsample[fval <= self.goal] else: pos_samples = xsample[fval >= self.goal] plot_pca_2d(pos_samples, fpath=self.work_dir / f'pca_sol.png') def report_entropy(self, ntimes, nsamples): ent_list = [] for iter_cnt in range(ntimes): samples, _ = self.sample_model(nsamples, self.model) probs = self.get_probs(samples, self.model) ent_list.append(-probs.prod(-1).log().mean().item()) ent = float(np.mean(ent_list) / self.ndim) print(f'entropy/dim: {ent}') return ent def check_solutions(self, ntimes=1, nsamples=1000): print('-------- REPORT --------') # self.check_random_solutions(ntimes, nsamples) acc, std, divesity = self.report_accuracy(ntimes, nsamples) ent = self.report_entropy(ntimes, nsamples) saved_data = dict(acc=acc, std=std, divesity=divesity, ent=ent) write_yaml(self.work_dir / 'performance.yaml', saved_data) # self.report_variation(nsamples) # self.plot_model_sol_pca() def check_random_solutions(self, ntimes, nsamples): rnd_specs = deepcopy(self.specs) rnd_params = rnd_specs['params'] rnd_params['work_dir'] = self.work_dir random_policy = Random(spec_dict=rnd_specs) random_policy.check_solutions(ntimes, nsamples) def main(self) -> None: # self.check_random_solutions(ntimes=10, nsamples=10) # input('Press Enter To continue:') self.set_seed(self.seed) self._run_alg() self.check_solutions(ntimes=10, nsamples=100)
def test_insert(): slt = SortedList(range(10), load=4) slt.insert(-1, 9) slt._check() slt.insert(-100, 0) slt._check() slt.insert(100, 10) slt._check() slt = SortedList(load=4) slt.insert(0, 5) slt._check() slt = SortedList(range(5, 15), load=4) for rpt in range(8): slt.insert(0, 4) slt._check() slt = SortedList(range(10), load=4) slt.insert(8, 8) slt._check()
from scipy.stats import pearsonr import warnings warnings.filterwarnings("ignore") ######################################################### # Reducer: ######################################################### last_date_key = None count_per_date = 0 favs_per_dt = 0 rt_per_dt = 0 aggregate_sentiment = 0 aggregate_sentiment_rnd = 0 # new variable for categorical sentiment sent_list_sort = SortedList() list_sentiment = [] list_sentiment_rnd = [] favs_to_follower = [] rt_to_follower = [] # covid count: aggregate_covid_count = 0 # Add 0 to all lists to begin with, makes all lists at least 2 in lengths. # Enables correlation and standard deviation where date < 2 (not meaningful anyway). sent_list_sort.add(0) list_sentiment.append(0) list_sentiment_rnd.append(0) favs_to_follower.append(0) rt_to_follower.append(0)
def test_index_valueerror7(): slt = SortedList([0] * 10 + [2] * 10, load=4) slt.index(1, 0, 10)
def test_append_valueerror(): slt = SortedList(range(100)) slt.append(5)
def test_repr(): this = SortedList(range(10), load=4) assert repr(this) == 'SortedList([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], load=4)'
def test_index_valueerror6(): slt = SortedList(range(10), load=4) slt.index(3, 5)
def test_insert_valueerror4(): slt = SortedList(range(10), load=4) slt.insert(5, 7)
def test_extend_valueerror2(): slt = SortedList(range(20), load=4) slt.extend([17, 18, 19, 20, 21, 22, 23])
def test_len(): slt = SortedList() for val in range(10000): slt.add(val) assert len(slt) == (val + 1)
def test_index_valueerror5(): slt = SortedList() slt.index(1)
def test_index_valueerror2(): slt = SortedList([0] * 10, load=4) slt.index(0, 0, -10)
def test_build_index(): slt = SortedList([0], load=4) slt._build_index() slt._check()
def test_index_valueerror4(): slt = SortedList([0] * 10, load=4) slt.index(1)
def test_imul(): this = SortedList(range(10), load=4) this *= 5 this._check() assert this == sorted(list(range(10)) * 5)
def test_pop_indexerror2(): slt = SortedList(range(10), load=4) slt.pop(10)
def test_extend_valueerror1(): slt = SortedList() slt.extend([1, 2, 3, 5, 4, 6])
def test_extend(): slt = SortedList(load=17) slt.extend(range(100)) slt._check() slt.extend(list(range(100, 200))) slt._check() for val in range(200, 300): del slt._index[:] slt._build_index() slt.extend([val] * (val - 199)) slt._check()
def test_repr_recursion(): this = SortedList([[1], [2], [3], [4]]) this.append(this) assert repr(this) == 'SortedList([[1], [2], [3], [4], ...], load=1000)'
def collect_matches(): initial_summoner_name = "GustavEnk" region = "EUW" summoner = Summoner(name=initial_summoner_name, region=region) patch_720 = Patch.from_str("7.20", region=region) unpulled_summoner_ids = SortedList([summoner.id]) pulled_summoner_ids = SortedList() unpulled_match_ids = SortedList() pulled_match_ids = SortedList() while unpulled_summoner_ids: # Get a random summoner from our list of unpulled summoners and pull their match history new_summoner_id = random.choice(unpulled_summoner_ids) new_summoner = Summoner(id=new_summoner_id, region=region) matches = filter_match_history(new_summoner, patch_720) unpulled_match_ids.update([match.id for match in matches]) unpulled_summoner_ids.remove(new_summoner_id) pulled_summoner_ids.add(new_summoner_id) while unpulled_match_ids: # Get a random match from our list of matches new_match_id = random.choice(unpulled_match_ids) new_match = Match(id=new_match_id, region=region) for participant in new_match.participants: if participant.summoner.id not in pulled_summoner_ids and participant.summoner.id not in unpulled_summoner_ids: unpulled_summoner_ids.add(participant.summoner.id) # The above lines will trigger the match to load its data by iterating over all the participants. # If you have a database in your datapipeline, the match will automatically be stored in it. unpulled_match_ids.remove(new_match_id) pulled_match_ids.add(new_match_id)
def test_update(): slt = SortedList() slt.update(range(1000)) assert len(slt) == 1000 slt._check() slt.update(range(100)) assert len(slt) == 1100 slt._check() slt.update(range(10000)) assert len(slt) == 11100 slt._check() values = sorted(chain(range(1000), range(100), range(10000))) assert all(tup[0] == tup[1] for tup in zip(slt, values))