def __init__(self): self.timestamp = None self.trx_per_resource = defaultdict(int) self.trx_per_user = defaultdict(int) self.trx_per_method = defaultdict(int) self.trx_per_status = defaultdict(int) self.trx_per_sec = 0.0
def reset(self, timestamp): logger.debug("Resetting basic data") self.timestamp = timestamp self.trx_per_resource = defaultdict(int) self.trx_per_user = defaultdict(int) self.trx_per_method = defaultdict(int) self.trx_per_status = defaultdict(int) self.trx_per_sec = 0.0
def _iter_events_assemble(waveform: Dict[Any, Union[Stream, Trace]]): """ given a dict of the form {event_id: waveforms}, create a new dict of the structure {sampling_rage: {event_id: waveforms}}""" out = defaultdict(lambda: defaultdict(obspy.Stream)) for item, value in waveform.items(): if isinstance(value, Trace): value = Stream(traces=[value]) for tr in value: sr = int(np.round(tr.stats.sampling_rate)) out[sr][item] += tr return dict(out)
def __init__(self, name, defaults: Sequence[Tuple[Union[str, Tuple], object]], callables: List[NamedExecutable], seconds=60, starting_moment: datetime.datetime = None, **kwargs): super().__init__(name, defaults, _default_factory=lambda: defaultdict(dict), **kwargs) self.metric_names: List[str] = [c[0] for c in callables] self.seconds = seconds # symbol, side -> trade self.storage: Dict[Tuple, Deque] = defaultdict(deque) self._callables: List[Callable[[List], float]] = [c[1] for c in callables] self._from: datetime.datetime = starting_moment self._skip_from = False
async def material_tree_json(request: web.Request): with session_scope() as sess: materials: List[Material] = (sess.query(Material).filter( Material.enabled.is_(True)).order_by(Material.substance.asc(), Material.id.asc()).all()) material_by_substance = defaultdict(list) for material in materials: if material.substance: material_by_substance[material.substance].append(material) tree = { 'name': 'materials', 'children': [{ 'name': subst, 'text': subst, 'children': [{ 'name': mat.name, 'size': 120000, 'img': nginx_url(mat.get_data_path('previews/bmps.png')), } for mat in subst_mats if mat.substance] } for subst, subst_mats in material_by_substance.items()], } return web.json_response(tree, dumps=partial(json.dumps, indent=2))
def __init__(self, gamma, alpha, epsilon, epsilon_decay): self.epsilon_decay = epsilon_decay self.epsilon = epsilon self.alpha = alpha self.gamma = gamma self.n_actions = 4 self.Q = defaultdict(lambda: np.zeros(4))
def alpha(patches): # split into runoff return, patch type, flowlength # subsets = [[], []] # # Filter on global flowlength # for patch in patches: # if patch.model.runoff_return: # subsets[0].append(patch) # else: # subsets[1].append(patch) subsets = [patches] new_subsets = [] for subset in subsets: subset_dict = defaultdict(list) for patch in subset: subset_dict[patch.type].append(patch) new_subsets += subset_dict.values() subsets = new_subsets new_subsets = [] for subset in subsets: # Sorting patches on FL subset.sort(key=lambda x: sum([cell.FL for cell in x.RL + x.BR])) # Splitting category in #FL_RESOLUTION equally sized groups of patches new_subsets += list(np.array_split(subset, RESOLUTION)) subsets = new_subsets return subsets
def get_top_subjects(self, top): '''Выделение самых встречающихся подлежащих''' frequencies = defaultdict(int) for sentence in self._sentences: for word in sentence: if word.part_sentence == PartSentence.Subject: frequencies[word.lemma] = word.frequency return sorted(frequencies.items(), key=lambda x: -x[1])[:top]
def out_edges(self): out_edges = defaultdict(list) out_edge_modifiers = {} for f, t, *l in self.ontology.in_edges: out_edges[t].append(f) out_edge_modifiers[t, f] = l self.out_edge_modifiers = out_edge_modifiers return out_edges
def process_ipynb_output_results(cell_order, outputs): keys = list(outputs.keys()) results = [] for k in keys[:]: # cell order, result order, type c, r, t = split_output_name(k) if c == cell_order: results.append((r, t, outputs[k])) dict_results = defaultdict(dict) for r, t, o in results: dict_results[r][t] = o results = [] for r, ts in dict_results.items(): if 'htm' in ts: results.append(CR(ResultTypes.Stream, r, ts['htm'].decode('UTF-8'), 'text/html')) elif 'png' in ts: results.append(CR(ResultTypes.Image, r, ts['png'], 'image/png')) elif 'jpg' in ts: results.append(CR(ResultTypes.Image, r, ts['jpg'], 'image/jpg')) elif 'ksh' in ts: results.append(CR(ResultTypes.Stream, r, ts['ksh'].decode('UTF-8'), 'text/plain')) elif 'txt' in ts: results.append(CR(ResultTypes.Stream, r, ts['txt'].decode('UTF-8'), 'text/plain')) elif 'c' in ts: results.append(CR(ResultTypes.Stream, r, ts['c'].decode('UTF-8'), 'text/plain')) elif 'bat' in ts: results.append(CR(ResultTypes.Stream, r, ts['bat'].decode('UTF-8'), 'text/plain')) else: logger.error("Unknown result type %s\n%s", repr(ts)[:40], repr(r)[:40]) return results
def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) party_id = kwargs["legacy_slug"] party = get_object_or_404(Party, legacy_slug=party_id) # Make the party emblems conveniently available in the context too: context["emblems"] = party.emblems.all() all_post_groups = self.election_data.posts.values_list( "group", flat=True).distinct() by_post_group = { pg: { "stats": None, "posts_with_memberships": defaultdict(list) } for pg in all_post_groups } for membership in (Membership.objects.filter( party=party, post_election__election=self.election_data, role=self.election_data.candidate_membership_role, ).select_related().prefetch_related("post", "person")): person = membership.person post = membership.post post_group = post.group by_post_group[post_group]["posts_with_memberships"][post].append({ "membership": membership, "person": person, "post": post }) # That'll only find the posts that someone from the party is # actually standing for, so add any other posts... for post in self.election_data.posts.all(): post_group = post.group post_group_data = by_post_group[post_group] posts_with_memberships = post_group_data["posts_with_memberships"] posts_with_memberships.setdefault(post, []) context["party"] = party context["party_name"] = party.name for post_group, data in by_post_group.items(): posts_with_memberships = data["posts_with_memberships"] by_post_group[post_group]["stats"] = get_post_group_stats( posts_with_memberships) data["posts_with_memberships"] = sorted( posts_with_memberships.items(), key=lambda t: t[0].label) context["candidates_by_post_group"] = sorted( [(pg, data) for pg, data in by_post_group.items() if pg in all_post_groups], key=lambda k: k[0], ) return context
def _split_by_sampling_rate( st: Union[obspy.Stream, obspy.Trace]) -> Dict[int, obspy.Stream]: """given a waveforms, split the waveforms into dicts with unique sampling rates""" if isinstance(st, obspy.Trace): # convert to waveforms if trace passed st = obspy.Stream(traces=[st]) # iterate and separate sampling_rates out = defaultdict(obspy.Stream) for tr in st: sr = int(np.round(tr.stats.sampling_rate)) out[sr] += tr return dict(out)
def calculate_frequencies(self): """Вычисляется частота вхождения каждого подлежащего""" frequencies = defaultdict(int) for sentence in self._sentences: for word in sentence: if word.part_sentence == PartSentence.Subject: frequencies[word.lemma] += 1 for sentence in self._sentences: for word in sentence: word.frequency = frequencies.get(word.lemma)
def train(self, train_set): num_genre = defaultdict(lambda: 0) for t_set in train_set: for genre in t_set["genre"]: num_genre[genre] += 1 / len(t_set["genre"]) self.genre_list = num_genre print(num_genre) word_counts = count_words(train_set) self.word_probs = word_probabilities(word_counts, num_genre, self.k)
def __init__(self, *, loop, address: str, port: int): self.loop = loop self.address = address self.port = port self.app = web.Application(loop=loop) self.app.router.add_routes( [web.get('/api/v1/address', self.on_address), web.get('/api/v1/payments/{token_address}/{partner_address}', self.on_payment_info)]) self.handler = None self.server = None # mapping (token_address, partner_address) -> {nonce1: amount1, nonce2: amount2, ...}: self.payments = defaultdict(dict)
def __init__(self, text_path, dict_path, synonymous_dict_path): self._text_path = text_path self._dict_path = dict_path self._synonymous_dict_path = synonymous_dict_path ''' person_counter是一个计数器,用来统计人物出现的次数。{'a':1,'b':2} person_per_paragraph每段文字中出现的人物[['a','b'],[]] relationships保存的是人物间的关系。key为人物A,value为字典,包含人物B和权值。 ''' self._person_counter = defaultdict(int) self._person_per_paragraph = [] self._relationships = {} self._synonymous_dict = {}
def group_new_test_indices_by_patient(ds): tids = ds.field_by_name('id') pids = ds.field_by_name('patient_id') cats = ds.field_by_name('created_at') edates = ds.field_by_name('date_taken_specific') edate_los = ds.field_by_name('date_taken_between_start') edate_his = ds.field_by_name('date_taken_between_end') print('row count:', ds.row_count()) patients = defaultdict(TestIndices) for i_r in range(ds.row_count()): patients[pids[i_r]].add(i_r) return patients
def scan_folders_for_files(folders): """ scan_folders_for_files loops through specified folders to look for their files :param folders: a list of paths to folders :return: files: a dictionary with as key a file and as value a list of the paths that lead to the file """ files = defaultdict(list) for folder in folders: if not folder.is_dir(): io.warn("Folder %s doesn't exist" % folder) for file in list(folder.glob('*.*')): files[file.stem].append(file) return files
def __init__(self, project_name): self.project_name = project_name self._was_warned = defaultdict(lambda: False) opts = self._get_database(opts=None) opts['connect'] = True if 'type' in opts: opts.pop('type') logger.debug(f'establishing connection to the database') self.client = MongoClient(**opts) logger.debug(f'connection established {self.client}') logger.debug(f'connected to the mongo server {self}')
def in_edges(self): # workaround for networkx bug when some edges have labels and some have not: in_edges = defaultdict(list) edge_modifiers = {} for f, t, *l in self.ontology.in_edges: # look out for is_not labels! for relation in l: assert 'not' not in relation in_edges[t].append(f) edge_modifiers[t, f] = l self.edge_modifiers = edge_modifiers return in_edges
def get_patients_with_old_format_tests(a_ds): apids = a_ds.field_by_name('patient_id') ahcts = a_ds.field_by_name('had_covid_test') atcps = a_ds.field_by_name('tested_covid_positive') auats = a_ds.field_by_name('updated_at') print('row count:', a_ds.row_count()) apatients = defaultdict(OldFormatTestSummary) for i_r in range(a_ds.row_count()): if ahcts[i_r] == 'True' or atcps[i_r] in ('no', 'yes'): apatients[apids[i_r]].add(i_r, atcps[i_r]) apatient_test_count = 0 for k, v in apatients.items(): if len(v.indices) > 0: apatient_test_count += 1 return apatients
def competition(patches): # split into runoff return, patch type, slope position # subsets = [[], []] # # Filter on global flowlength # for patch in patches: # if patch.model.runoff_return: # subsets[0].append(patch) # else: # subsets[1].append(patch) subsets = [patches] new_subsets = [] for subset in subsets: # Sorting patches on FL subset.sort(key=lambda x: sum([cell.FL for cell in x.RL + x.BR])) # Splitting category in #FL_RESOLUTION equally sized groups of patches new_subsets += np.array_split(subset, RESOLUTION) subsets = new_subsets new_subsets = [] for subset in subsets: # Sorting patches on FL subset = list(subset) subset.sort(key=lambda x: sum([cell.pos[0] for cell in x.RL + x.BR])) # Splitting category in #FL_RESOLUTION equally sized groups of patches new_subsets += np.array_split(subset, RESOLUTION) subsets = new_subsets intra_r, intra_b, inter = [], [], [] for subset in subsets: subset_dict = defaultdict(list) for patch in subset: # Only sort on the type, not the size!! subset_dict[patch.type[0]].append(patch) intra_r.append(subset_dict['R']) intra_b.append(subset_dict['B']) inter.append(subset_dict['M']) return intra_r, intra_b, inter
def create_from_tokens( cls, tokens: Iterable, maximum_vocabulary: Optional[int] = None) -> "TokenCodec": """ Create a codec from a sequence of tokens. The vocabulary will consist of all the tokens. Token indexes are ordered by frequency then token order. :param tokens: sequence of token from which to build the codec :param maximum_vocabulary: optionally clip the vocabulary to this many of the most frequent tokens :return: a codec """ token_count = defaultdict(int) for token in tokens: token = Token.create(token) token_count[token] += 1 index_to_token = [ token for _, token in sorted( ((count, token) for token, count in token_count.items()), key=lambda t: (-t[0], t[1]))[:maximum_vocabulary] ] return cls(index_to_token)
def filter_duplicate_new_tests(ds, patients, threshold_for_diagnostic_print=1000000): tids = ds.field_by_name('id') pids = ds.field_by_name('patient_id') cats = ds.field_by_name('created_at') edates = ds.field_by_name('date_taken_specific') edate_los = ds.field_by_name('date_taken_between_start') edate_his = ds.field_by_name('date_taken_between_end') cleaned_patients = defaultdict(TestIndices) for p in patients.items(): # print(p[0], len(p[1].indices)) test_dates = set() for i_r in reversed(p[1].indices): test_dates.add((edates[i_r], edate_los[i_r], edate_his[i_r])) if len(test_dates) == 1: istart = p[1].indices[-1] # utils.print_diagnostic_row(f"{istart}", ds, istart, ds.names_) cleaned_patients[p[0]].add(istart) else: cleaned_entries = dict() for t in test_dates: cleaned_entries[t] = list() for i_r in reversed(p[1].indices): cleaned_entries[(edates[i_r], edate_los[i_r], edate_his[i_r])].append(i_r) if len(test_dates) > threshold_for_diagnostic_print: print(p[0]) for e in sorted(cleaned_entries.items(), key=lambda x: x[0]): last_index = e[1][0] if len(test_dates) > threshold_for_diagnostic_print: utils.print_diagnostic_row(f"{p[0]}/{last_index}", ds, last_index, ds.names_) cleaned_patients[p[0]].add(last_index) return cleaned_patients
class StatsHolder: log: 'Logger' lastUpdate: datetime # user and game events: # User logs in # User logs out # User join game # User leaves game connectTimes: List[float] = [0] userEvents = WeakList() userEventsView: Dict[str, List[UserEvent]] = defaultdict(list) def __init__(self, main: 'WesBot', log) -> None: self.main = main self.log = log self.lastUpdate = datetime.datetime.now() def addConnectTime(self) -> None: if self.connectTimes[0] < 0.0001: self.connectTimes.clear() self.connectTimes.append(time.time()) def getTimeSinceFirstConnect(self) -> str: return str( datetime.timedelta(seconds=time.time() - self.connectTimes[0])) def getTimeSinceLastConnect(self): return str( datetime.timedelta(seconds=time.time() - self.connectTimes[-1])) def onUserRemove(self, name, comment=""): if comment is None: return e = UserEvent(time.time(), "-", name, comment) self.userEventsView[name].append(e) self.userEvents.append(e) def onUserAdd(self, name, comment=""): if comment is None: return e = UserEvent(time.time(), "+", name, comment) self.userEventsView[name].append(e) self.userEvents.append(e) def getUserStats(self, name: str) -> str: name = name.strip() self.loadUserEvents(name) if name not in self.userEventsView: return "No stats found for '{}'".format(name) d = self.userEventsView[name] totalUptime = 0 lastUptime = 0 firstJoin = 0 latestJoin = 0 lastSeen = 0 count = 0 # 0 -> user is offline # 1 -> user is online # 2 -> user is added, and immediately deleted for e in d: if e.event == "+": count += 1 if count == 2: continue if firstJoin == 0: firstJoin = e.time if count == 1: latestJoin = e.time if e.event == "-": count -= 1 if e.comment == "onQuit": # Everyone is considered offline while bot is offline count = 0 if count == 1: continue if count == 0: lastUptime = e.time - latestJoin totalUptime += lastUptime lastSeen = e.time if count == 1: lastSeen = time.time() lastUptime = lastSeen - latestJoin totalUptime += lastUptime return "{} has been online for {}, in last session {}. Last seen online: {}".format( name, str(datetime.timedelta(seconds=totalUptime)), str(datetime.timedelta(seconds=lastUptime)), datetime.datetime.fromtimestamp(lastSeen).strftime( '%d.%m %H:%M:%S')) def getLastSeenTime(self, name: str) -> datetime: lastSeen = 0 name = name.strip() if name not in self.userEventsView: self.log.debug("No stats for user {}".format(name)) return datetime.datetime.fromtimestamp(lastSeen) d = self.userEventsView[name] # TODO save user updates with different comment than login and logout # Last event time, and for online users current time if len(d) > 0: lastSeen = d[-1].time if name in self.main.lobby.users.getOnlineNames(): lastSeen = time.time() fromtimestamp = datetime.datetime.fromtimestamp(lastSeen) self.log.log( 4, "Found last seen time {} for {}".format(fromtimestamp, name)) return fromtimestamp def onQuit(self): self.log.debug("Removing all online users on quit") self.logStats() u: User for u in self.main.lobby.users.getOnlineUsers(): self.onUserRemove(u.name, "onQuit") self.saveUsers() def tick(self): now: datetime = datetime.datetime.now() if now - self.lastUpdate > datetime.timedelta(hours=1): self.logStats() self.deleteOldData(now) self.lastUpdate = now def logStats(self): self.log.info("Game stats: {}, User stats: {}".format( self.main.lobby.games.getStats(), self.main.lobby.users.getStats())) def deleteOldData( self, now: datetime, deletionTime: datetime.timedelta = datetime.timedelta(hours=2)): self.log.debug("userEvents count before archive {}".format( len(self.userEvents))) self.saveUsers() deletableNames = set() for name in self.userEventsView: last_seen_time: datetime = self.getLastSeenTime(name) time_since_seen: datetime.timedelta = now - last_seen_time self.log.log( 5, "User {} time since seen {}, time seen {}".format( name, time_since_seen, last_seen_time)) if time_since_seen > deletionTime: deletableNames.add(name) for name in deletableNames: del self.userEventsView[name] self.log.debug("Deleted {}".format(name)) self.log.debug("userEvents count after archive {}".format( len(self.userEvents))) def saveUsers(self): for name in self.userEventsView: unsavedSince = 0 filename = "user_events/{}/{}.log".format(name, name) os.makedirs(os.path.dirname(filename), exist_ok=True) if os.path.isfile(filename): with open(filename, "r", encoding="utf8") as f: lines = f.readlines() if len(lines) > 0: e = UserEvent.fromJSON(lines[-1]) unsavedSince = e.time with open(filename, "a", encoding="utf8") as f: for e in self.userEventsView[name]: if e.time > unsavedSince + 0.0001: f.write(e.toJSON() + "\n") # TODO new file when it is too large # if os.path.getsize(filename) > 1 * 1000 * 1000: # pass self.main.log.debug("Users saved") def loadUserEvents(self, name): filename = "user_events/{}/{}.log".format(name, name) if not os.path.isfile(filename): return self.log.debug("Loading user events for {}".format(name)) loadedEvents = [] firstAvailableTime = time.time() if len(self.userEventsView[name]) > 0: firstAvailableTime = self.userEventsView[name][0].time with open(filename, "r", encoding="utf8") as f: for line in f: e = UserEvent.fromJSON(line) if e.time < firstAvailableTime: loadedEvents.append(e) self.log.debug("Loaded {} user events for {}".format( len(loadedEvents), name)) if len(loadedEvents) == 0: return self.userEventsView[name].extend(loadedEvents) self.log.debug("First extend") self.userEventsView[name].sort(key=lambda e: e.time) self.log.debug("First sort") self.userEvents.extend(loadedEvents) self.log.debug("Second extend") self.userEvents.sort(key=lambda e: e.time) self.log.debug("Second sort")
def __init__(self): self.params = None self.model = None self.epoch_counter = defaultdict(int) self.batch_counter = defaultdict(int)
def __init__(self, average_stats_pqueue: PriorityQueue, time_period): self.__avg_stats_queue = average_stats_pqueue self.__time_period = time_period self.__trx_per_sec = defaultdict(int) self.__current_second = None
def convert_to_layer_nodes(root): """ At each level in the SPN rooted in the 'root' node, model all the nodes as a single layer-node. Args: root (Node): The root of the SPN graph. Returns: root (Node): The root of the SPN graph, with each layer modelled as a single layer-node. """ parents = defaultdict(list) depths = defaultdict(list) node_to_depth = OrderedDict() node_to_depth[root] = 1 def get_parents(node): # Add to Parents dict if node.is_op: for i in node.inputs: if (i and # Input not empty not (i.is_param or i.is_var)): parents[i.node].append(node) node_to_depth[i.node] = node_to_depth[node] + 1 def permute_inputs(input_values, input_sizes): # For a given list of inputs and their corresponding sizes, create a # nested-list of (input, index) pairs. # E.g: input_values = [(A, [2, 5]), (B, None)] # input_sizes = [2, 3] # inputs = [[('A', 2), ('A', 5)], # [('B', 0), ('B', 1), ('B', 2)]] inputs = [ list(product([inp.node], inp.indices)) if inp and inp.indices else list(product([inp.node], list(range(inp_size)))) for inp, inp_size in zip(input_values, input_sizes) ] # For a given nested-list of (input, index) pairs, permute over the inputs # E.g: permuted_inputs = [('A', 2), ('B', 0), # ('A', 2), ('B', 1), # ('A', 2), ('B', 2), # ('A', 5), ('B', 0), # ('A', 5), ('B', 1), # ('A', 5), ('B', 2)] permuted_inputs = list(product(*[inps for inps in inputs])) return list(chain(*permuted_inputs)) # Create a parents dictionary of the SPN graph traverse_graph(root, fun=get_parents, skip_params=True) # Create a depth dictionary of the SPN graph for key, value in node_to_depth.items(): depths[value].append(key) spn_depth = len(depths) # Iterate through each depth of the SPN, starting from the deepest layer, # moving up to the root node for depth in range(spn_depth, 1, -1): if isinstance(depths[depth][0], (Sum, ParallelSums)): # A Sums Layer # Create a default SumsLayer node with tf.name_scope("Layer%s" % depth): sums_layer = SumsLayer(name="SumsLayer-%s.%s" % (depth, 1)) # Initialize a counter for keeping track of number of sums # modelled in the layer node layer_num_sums = 0 # Initialize an empty list for storing sum-input-sizes of sums # modelled in the layer node num_or_size_sums = [] # Iterate through each node at the current depth of the SPN for node in depths[depth]: # TODO: To be replaced with node.num_sums once AbstractSums # class is introduced # No. of sums modelled by the current node node_num_sums = (1 if isinstance(node, Sum) else node.num_sums) # Add Input values of the current node to the SumsLayer node sums_layer.add_values(*node.values * node_num_sums) # Add sum-input-size, of each sum modelled in the current node, # to the list num_or_size_sums += [sum(node.get_input_sizes()[2:]) ] * node_num_sums # Visit each parent of the current node for parent in parents[node]: try: # 'Values' in case parent is an Op node values = list(parent.values) except AttributeError: # 'Inputs' in case parent is a Concat node values = list(parent.inputs) # Iterate through each input value of the current parent node for i, value in enumerate(values): # If the value is the current node if value.node == node: # Check if it has indices if value.indices is not None: # If so, then just add the num-sums of the # layer-op as offset indices = (np.asarray(value.indices) + layer_num_sums).tolist() else: # If not, then create a list accrodingly indices = list( range(layer_num_sums, (layer_num_sums + node_num_sums))) # Replace previous (node) Input value in the # current parent node, with the new layer-node value values[i] = (sums_layer, indices) break # Once child-node found, don't have to search further # Reset values of the current parent node, by including # the new child (Layer-node) try: # set 'values' in case parent is an Op node parent.set_values(*values) except AttributeError: # set 'inputs' in case parent is a Concat node parent.set_inputs(*values) # Increment num-sums-counter of the layer-node layer_num_sums += node_num_sums # Disconnect node.disconnect_inputs() # After all nodes at a certain depth are modelled into a Layer-node, # set num-sums parameter accordingly sums_layer.set_sum_sizes(num_or_size_sums) elif isinstance(depths[depth][0], (Product, PermuteProducts)): # A Products Layer with tf.name_scope("Layer%s" % depth): prods_layer = ProductsLayer(name="ProductsLayer-%s.%s" % (depth, 1)) # Initialize a counter for keeping track of number of prods # modelled in the layer node layer_num_prods = 0 # Initialize an empty list for storing prod-input-sizes of prods # modelled in the layer node num_or_size_prods = [] # Iterate through each node at the current depth of the SPN for node in depths[depth]: # Get input values and sizes of the product node input_values = list(node.values) input_sizes = list(node.get_input_sizes()) if isinstance(node, PermuteProducts): # Permute over input-values to model permuted products input_values = permute_inputs(input_values, input_sizes) node_num_prods = node.num_prods prod_input_size = len(input_values) // node_num_prods elif isinstance(node, Product): node_num_prods = 1 prod_input_size = int(sum(input_sizes)) # Add Input values of the current node to the ProductsLayer node prods_layer.add_values(*input_values) # Add prod-input-size, of each product modelled in the current # node, to the list num_or_size_prods += [prod_input_size] * node_num_prods # Visit each parent of the current node for parent in parents[node]: values = list(parent.values) # Iterate through each input value of the current parent node for i, value in enumerate(values): # If the value is the current node if value.node == node: # Check if it has indices if value.indices is not None: # If so, then just add the num-prods of the # layer-op as offset indices = value.indices + layer_num_prods else: # If not, then create a list accrodingly indices = list( range(layer_num_prods, (layer_num_prods + node_num_prods))) # Replace previous (node) Input value in the # current parent node, with the new layer-node value values[i] = (prods_layer, indices) # Reset values of the current parent node, by including # the new child (Layer-node) parent.set_values(*values) # Increment num-prods-counter of the layer node layer_num_prods += node_num_prods # Disconnect node.disconnect_inputs() # After all nodes at a certain depth are modelled into a Layer-node, # set num-prods parameter accordingly prods_layer.set_prod_sizes(num_or_size_prods) elif isinstance(depths[depth][0], (SumsLayer, ProductsLayer, Concat)): # A Concat node pass else: raise StructureError("Unknown node-type: {}".format( depths[depth][0])) return root
def proba_motion_analysis(data_x_gaia, motions=None, x_lim=(0.3, 1), step=0.004, mean_y_lines=None): motions = ['parallax'] if motions is None else motions mu_dict, sigma_dict, median_dict, error_dict = defaultdict( list), defaultdict(list), defaultdict(list), defaultdict(list) # Get QSOs qso_x_gaia = data_x_gaia.loc[data_x_gaia['CLASS_PHOTO'] == 'QSO'] # Limit QSOs to proba thresholds thresholds = np.arange(x_lim[0], x_lim[1], step) for thr in thresholds: qso_x_gaia_limited = qso_x_gaia.loc[qso_x_gaia['QSO_PHOTO'] >= thr] for motion in motions: # Get stats (mu, sigma) = stats.norm.fit(qso_x_gaia_limited[motion]) median = np.median(qso_x_gaia_limited[motion]) error = sigma / math.sqrt(qso_x_gaia_limited.shape[0]) # Store values mu_dict[motion].append(mu) sigma_dict[motion].append(sigma) median_dict[motion].append(median) error_dict[motion].append(error) # Plot statistics to_plot = [((mu_dict, error_dict), 'mean'), (sigma_dict, 'sigma'), (median_dict, 'median')] color_palette = get_cubehelix_palette(len(motions)) for t in to_plot: plt.figure() label = None for i, motion in enumerate(motions): if len(motions) != 1: label = motion if t[1] == 'mean': vals = t[0][0][motion] errors = t[0][1][motion] else: vals = t[0][motion] errors = None plt.plot(thresholds, vals, label=label, color=color_palette[i], linestyle=get_line_style(i)) ax = plt.gca() if errors: lower = np.array(vals) - np.array(errors) / 2 upper = np.array(vals) + np.array(errors) / 2 ax.fill_between(thresholds, lower, upper, color=color_palette[i], alpha=0.2) if t[1] == 'mean' and mean_y_lines is not None: x_lim = ax.get_xlim() thr_x_lim = np.arange(x_lim[0], x_lim[1] + 0.01, 0.01) for line_name, y, y_err in mean_y_lines: plt.axhline(y, linestyle='--', color='b') ax.fill_between(thr_x_lim, y - y_err / 2, y + y_err / 2, color='b', alpha=0.2) plt.text( thresholds[0] + 0.01 * abs(max(thresholds) - min(thresholds)), y + 0.06 * abs(max(vals) - min(vals)), line_name) ax.set_xlim(x_lim) plt.xlabel('minimum classification probability') plt.ylabel('{} parallax {}'.format(t[1], '[mas]')) if label: plt.legend(framealpha=1.0)
def __init__(self): super().__init__() self.operations: DefaultDict[UpdateOperation, List[str]] = defaultdict(list)
def featurize(self, struct, idx): """ Get crystal fingerprint of site with given index in input structure. Args: struct (Structure): Pymatgen Structure object. idx (int): index of target site in structure. Returns: list of weighted order parameters of target site. """ cn_fingerprint_array = defaultdict( list) # dict where key = CN, val is array that contains each OP for that CN total_weight = math.pi / 4 # 1/4 unit circle area target = None if self.cation_anion: target = [] m_oxi = struct[idx].specie.oxi_state for site in struct: if site.specie.oxi_state * m_oxi <= 0: # opposite charge target.append(site.specie) if not target: raise ValueError( "No valid targets for site within cation_anion constraint!") # Use a Voronoi tessellation to identify neighbors of this site vnn = VoronoiNN(cutoff=self.cutoff_radius, targets=target) n_w = get_nearest_neighbors(vnn, struct, idx) # Convert nn info to just a dict of neighbor -> weight n_w = dict((x['site'], x['weight']) for x in n_w) dist_sorted = (sorted(n_w.values(), reverse=True)) if self.override_cn1: cn1 = 1 for d in dist_sorted[1:]: cn1 = cn1 * (dist_sorted[0] ** 2 - d ** 2) / dist_sorted[0] ** 2 cn_fingerprint_array[1] = [round(cn1, 6)] dist_sorted[0] = dist_sorted[1] dist_norm = [d / dist_sorted[0] for d in dist_sorted if d > 0] dist_bins = [] # bin numerical tolerances (~error bar of measurement) for d in dist_norm: if not dist_bins or ( d > self.tol and dist_bins[-1] / (1 + self.tol) > d): dist_bins.append(d) for dist_idx, dist in enumerate(dist_bins): neigh_sites = [n for n, w in n_w.items() if w > 0 and w / dist_sorted[0] >= dist / ( 1 + self.tol)] cn = len(neigh_sites) if cn in self.ops: for opidx, op in enumerate(self.ops[cn]): if self.optypes[cn][opidx] == "wt": opval = 1 else: opval = \ op.get_order_parameters([struct[idx]] + neigh_sites, 0, indices_neighs=[i for i in range(1, len( neigh_sites) + 1)])[ 0] opval = opval or 0 # handles None # figure out the weight for this opval based on semicircle integration method x1 = 1 - dist x2 = 1 if dist_idx == len(dist_bins) - 1 else \ 1 - dist_bins[dist_idx + 1] weight = self._semicircle_integral(x2) - \ self._semicircle_integral(x1) opval = opval * weight / total_weight cn_fingerprint_array[cn].append(opval) # convert dict to list cn_fingerprint = [] for cn in sorted(self.optypes): for op_idx, _ in enumerate(self.optypes[cn]): try: cn_fingerprint.append(cn_fingerprint_array[cn][op_idx]) except IndexError: # no OP value computed cn_fingerprint.append(0) return cn_fingerprint
def __init__(self, defaults: Sequence[Tuple[Union[str, Tuple], object]], seconds=60, callables: List[DeltaExecutable] = (('quantity', lambda x: len(x)), ('volume_total', lambda x: sum(x))), starting_moment: datetime.datetime = None, **kwargs): super().__init__(f'delta-{seconds}', defaults, callables, seconds, starting_moment, **kwargs) self._time_storage = defaultdict(deque)