def muster_basis_options(self): text = "" lowername = self.method.lower() options = defaultdict(lambda: defaultdict(dict)) options["BASIS"]["ORBITAL"]["value"] = self.basis if self.method in ["ccsd(t)-f12-optri"]: if self.basis == "cc-pvdz-f12": options["BASIS"]["JKFIT"]["value"] = "aug-cc-pvtz/jkfit" options["BASIS"]["JKFITC"]["value"] = self.basis + "/optri" options["BASIS"]["MP2FIT"]["value"] = "aug-cc-pvtz/mp2fit" elif ( ("df-" in self.method) or ("f12" in self.method) or (self.method in ["mp2c", "dft-sapt", "dft-sapt-pbe0acalda"]) ): if self.unaugbasis and self.auxbasis: options["BASIS"]["JKFIT"]["value"] = self.auxbasis + "/jkfit" options["BASIS"]["JKFITB"]["value"] = self.unaugbasis + "/jkfit" options["BASIS"]["MP2FIT"]["value"] = self.auxbasis + "/mp2fit" options["BASIS"]["DFLHF"]["value"] = self.auxbasis + "/jkfit" else: raise ValidationError("""Auxiliary basis not predictable from orbital basis '%s'""" % (self.basis)) return text, options
def __init__(self): self.log = SimLog("cocotb.scheduler") if _debug: self.log.setLevel(logging.DEBUG) # A dictionary of pending coroutines for each trigger, # indexed by trigger self._trigger2coros = collections.defaultdict(list) # A dictionary of pending triggers for each coroutine, indexed by coro self._coro2triggers = collections.defaultdict(list) # Our main state self._mode = Scheduler._MODE_NORMAL # A dictionary of pending writes self._writes = {} self._pending_coros = [] self._pending_callbacks = [] self._pending_triggers = [] self._pending_threads = [] self._pending_events = [] # Events we need to call set on once we've unwound self._terminate = False self._test_result = None self._entrypoint = None self._main_thread = threading.current_thread() # Select the appropriate scheduling algorithm for this simulator self.advance = self.default_scheduling_algorithm self._is_reacting = False
def get_context_data(self, **kwargs): context = super(BugmailStatsView, self).get_context_data(**kwargs) json_stats = cache.get(self.cache_key) if not json_stats: wks_ago = (now() - timedelta(days=14)).date() stats = BugmailStat.objects.stats_for_range(wks_ago) stats_dict = { BugmailStat.TOTAL: defaultdict(int), BugmailStat.USED: defaultdict(int), } for s in stats: stats_dict[s.stat_type][date_to_js(s.date)] += s.count all_stats = { 'total': [], 'used': [], 'x_axis': [], } stats_total = stats_dict[BugmailStat.TOTAL] stats_used = stats_dict[BugmailStat.USED] for d in date_range(wks_ago): d = date_to_js(d) all_stats['x_axis'].append(d) all_stats['total'].append([d, stats_total[d]]) all_stats['used'].append([d, stats_used[d]]) json_stats = json.dumps(all_stats) cache.set(self.cache_key, json_stats, 1800) # 30 minutes context['stats'] = json_stats return context
def bench_b(power_list): n_samples, n_features = 1000, 10000 data_params = {'n_samples': n_samples, 'n_features': n_features, 'tail_strength': .7, 'random_state': random_state} dataset_name = "low rank matrix %d x %d" % (n_samples, n_features) ranks = [10, 50, 100] if enable_spectral_norm: all_spectral = defaultdict(list) all_frobenius = defaultdict(list) for rank in ranks: X = make_low_rank_matrix(effective_rank=rank, **data_params) if enable_spectral_norm: X_spectral_norm = norm_diff(X, norm=2, msg=False) X_fro_norm = norm_diff(X, norm='fro', msg=False) for n_comp in [np.int(rank/2), rank, rank*2]: label = "rank=%d, n_comp=%d" % (rank, n_comp) print(label) for pi in power_list: U, s, V, _ = svd_timing(X, n_comp, n_iter=pi, n_oversamples=2, power_iteration_normalizer='LU') if enable_spectral_norm: A = U.dot(np.diag(s).dot(V)) all_spectral[label].append(norm_diff(X - A, norm=2) / X_spectral_norm) f = scalable_frobenius_norm_discrepancy(X, U, s, V) all_frobenius[label].append(f / X_fro_norm) if enable_spectral_norm: title = "%s: spectral norm diff vs n power iteration" % (dataset_name) plot_power_iter_vs_s(power_iter, all_spectral, title) title = "%s: frobenius norm diff vs n power iteration" % (dataset_name) plot_power_iter_vs_s(power_iter, all_frobenius, title)
def default_scheduling_algorithm(self): """ Decide whether we need to schedule our own triggers (if at all) in order to progress to the next mode. This algorithm has been tested against the following simulators: Icarus Verilog """ if not self._terminate and self._writes: if self._mode == Scheduler._MODE_NORMAL: if not self._readwrite.primed: self._readwrite.prime(self.react) elif not self._next_timestep.primed: self._next_timestep.prime(self.react) elif self._terminate: if _debug: self.log.debug("Test terminating, scheduling Timer") for t in self._trigger2coros: t.unprime() for t in [self._readwrite, self._readonly, self._next_timestep, self._timer1, self._timer0]: if t.primed: t.unprime() self._timer1.prime(self.begin_test) self._trigger2coros = collections.defaultdict(list) self._coro2triggers = collections.defaultdict(list) self._terminate = False self._mode = Scheduler._MODE_TERM
def getTrainingContextData(): training_data = OrderedDict() #Initialising the xml parser for the training and test set training_root = initializeXMLParser(dir_path+training_file) #Grabbing one word type at a time for word_type_xml in training_root: word_type = word_type_xml.attrib['item'] training_data[word_type] = defaultdict(lambda: defaultdict(dict)) #Grabbing the instance id and its list of senses for word_instance in word_type_xml: instance = word_instance.attrib['id'] senses = [answer.attrib['senseid'] for answer in word_instance.findall('answer')] pre_context = word_instance.find('context').text.split() post_context = word_instance.find('context').find('head').tail.split() #Pre-processing the pre-context and post context #TODO: Check why this is reducing the accuracy of the model by 1% pre_context = preProcessContextData(pre_context) post_context = preProcessContextData(post_context) training_data[word_type]['training'][instance] = {"Sense":senses, "Pre-Context":pre_context, "Post-Context":post_context } #break;#TODO: Remove this breakpoint. Only testing for one word type right now return training_data
def as_coefficients_dict(a): """Return a dictionary mapping terms to their Rational coefficient. Since the dictionary is a defaultdict, inquiries about terms which were not present will return a coefficient of 0. If an expression is not an Add it is considered to have a single term. Examples ======== >>> from sympy.abc import a, x >>> (3*x + a*x + 4).as_coefficients_dict() {1: 4, x: 3, a*x: 1} >>> _[a] 0 >>> (3*a*x).as_coefficients_dict() {a*x: 3} """ d = defaultdict(list) for ai in a.args: c, m = ai.as_coeff_Mul() d[m].append(c) for k, v in d.iteritems(): if len(v) == 1: d[k] = v[0] else: d[k] = Add(*v) di = defaultdict(int) di.update(d) return di
def __init__(self, max_n): """ max_n must be greater than or equal to 2. """ self._max_n = max_n # Maps {n: {ngram_prefix: word_counts}} # ngram_prefix is a tuple of words. # word_counts is a Counter of word to count. self._ngram_word_counts_map = {} # Used to calculate the continuation counts. # For each n, maps a word to a set of ngram_prefix that preceed it. # Maps {n: {word: set(ngram_prefix)}} self._continuations_map = {} # Used to normalize continuation counts into a probability. # Maps {n: set(ngram)} self._ngrams_map = {} # Maps {order: discount} # TODO(dounanshi): calculate discount http://www.riacs.edu/research/technical_reports/TR_pdf/TR_00.07.pdf self._discount_map = {1: .75, 2: .75, 3: .75} # Initialize maps. for i in range(max_n): n = i + 1 self._ngram_word_counts_map[n] = defaultdict(Counter) self._continuations_map[n] = defaultdict(set) self._ngrams_map[n] = set() # Maps {ngram_prefix: count} self._prefix_count_cache = {} # Maps {ngram_prefix: (n1, n2, n3)} self._nvals_cache = {}
def run_merge(filenames): """Merges all Skype databases to a new database.""" dbs = [skypedata.SkypeDatabase(f) for f in filenames] db_base = dbs.pop() counts = collections.defaultdict(lambda: collections.defaultdict(int)) postbacks = Queue.Queue() postfunc = lambda r: postbacks.put(r) worker = workers.MergeThread(postfunc) name, ext = os.path.splitext(os.path.split(db_base.filename)[-1]) now = datetime.datetime.now().strftime("%Y%m%d") filename_final = util.unique_path("%s.merged.%s%s" % (name, now, ext)) print("Creating %s, using %s as base." % (filename_final, db_base)) shutil.copyfile(db_base.filename, filename_final) db2 = skypedata.SkypeDatabase(filename_final) chats2 = db2.get_conversations() db2.get_conversations_stats(chats2) for db1 in dbs: chats = db1.get_conversations() db1.get_conversations_stats(chats) bar_total = sum(c["message_count"] for c in chats) bar_text = " Processing %.*s.." % (30, db1) bar = ProgressBar(max=bar_total, afterword=bar_text) bar.start() args = {"db1": db1, "db2": db2, "chats": chats, "type": "diff_merge_left"} worker.work(args) while True: result = postbacks.get() if "error" in result: print("Error merging %s:\n\n%s" % (db1, result["error"])) worker = None # Signal for global break break # break while True if "done" in result: break # break while True if "diff" in result: counts[db1]["chats"] += 1 counts[db1]["msgs"] += len(result["diff"]["messages"]) msgcounts = sum(c["message_count"] for c in result["chats"]) bar.update(bar.value + msgcounts) if result["output"]: log(result["output"]) if not worker: break # break for db1 in dbs bar.stop() bar.afterword = " Processed %s." % db1 bar.update(bar_total) print if not counts: print("Nothing new to merge.") db2.close() os.unlink(filename_final) else: for db1 in dbs: print("Merged %s in %s from %s." % (util.plural("message", counts[db1]["msgs"]), util.plural("chat", counts[db1]["chats"]), db1)) print("Merge into %s complete." % db2)
def APMTracker(replay): """ Builds ``player.aps`` and ``player.apm`` dictionaries where an action is any Selection, Hotkey, or Ability event. Also provides ``player.avg_apm`` which is defined as the sum of all the above actions divided by the number of seconds played by the player (not necessarily the whole game) multiplied by 60. """ for player in replay.players: player.aps = defaultdict(int) player.apm = defaultdict(int) player.seconds_played = replay.length.seconds for event in player.events: if event.name == 'SelectionEvent' or 'AbilityEvent' in event.name or 'ControlGroup' in event.name: player.aps[event.second] += 1 player.apm[int(event.second/60)] += 1 elif event.name == 'PlayerLeaveEvent': player.seconds_played = event.second if len(player.apm) > 0: player.avg_apm = sum(player.aps.values())/float(player.seconds_played)*60 else: player.avg_apm = 0 return replay
def _getavailablepackfiles(self): """For each pack file (a index/data file combo), yields: (full path without extension, mtime, size) mtime will be the mtime of the index/data file (whichever is newer) size is the combined size of index/data file """ indexsuffixlen = len(self.INDEXSUFFIX) packsuffixlen = len(self.PACKSUFFIX) ids = set() sizes = defaultdict(lambda: 0) mtimes = defaultdict(lambda: []) try: for filename, type, stat in osutil.listdir(self.path, stat=True): id = None if filename[-indexsuffixlen:] == self.INDEXSUFFIX: id = filename[:-indexsuffixlen] elif filename[-packsuffixlen:] == self.PACKSUFFIX: id = filename[:-packsuffixlen] # Since we expect to have two files corresponding to each ID # (the index file and the pack file), we can yield once we see # it twice. if id: sizes[id] += stat.st_size # Sum both files' sizes together mtimes[id].append(stat.st_mtime) if id in ids: yield (os.path.join(self.path, id), max(mtimes[id]), sizes[id]) else: ids.add(id) except OSError as ex: if ex.errno != errno.ENOENT: raise
def worker_list(self, include_running=True, **kwargs): self.prune() workers = [ dict( name=worker.id, last_active=worker.last_active, started=getattr(worker, 'started', None), **worker.info ) for worker in self._state.get_active_workers()] workers.sort(key=lambda worker: worker['started'], reverse=True) if include_running: running = collections.defaultdict(dict) num_pending = collections.defaultdict(int) num_uniques = collections.defaultdict(int) for task in self._state.get_pending_tasks(): if task.status == RUNNING and task.worker_running: running[task.worker_running][task.id] = self._serialize_task(task.id, False) elif task.status == PENDING: for worker in task.workers: num_pending[worker] += 1 if len(task.workers) == 1: num_uniques[list(task.workers)[0]] += 1 for worker in workers: tasks = running[worker['name']] worker['num_running'] = len(tasks) worker['num_pending'] = num_pending[worker['name']] worker['num_uniques'] = num_uniques[worker['name']] worker['running'] = tasks return workers
def extract_classifiers(self, das, utterances, verbose=False): # process the training data self.utterances = utterances self.das = das self.utterances_list = self.utterances.keys() self.utterance_fvc = {} self.das_abstracted = {} self.das_category_labels = {} for utt_idx in self.utterances_list: self.utterances[utt_idx] = self.preprocessing.normalise(self.utterances[utt_idx]) self.utterance_fvc[utt_idx] = self.get_fvc(self.utterances[utt_idx]) self.das_abstracted[utt_idx], self.das_category_labels[utt_idx] = \ self.get_abstract_da(self.das[utt_idx],self.utterance_fvc[utt_idx]) # get the classifiers self.classifiers = defaultdict(int) self.classifiers = defaultdict(int) for k in self.utterances_list: for dai in self.das_abstracted[k].dais: self.classifiers[unicode(dai)] += 1 if verbose: if dai.value and 'CL_' not in dai.value: print '=' * 120 print 'Un-abstracted category label value' print '-' * 120 print unicode(self.utterances[k]) print unicode(self.utterance_fvc[k]) print unicode(self.das[k]) print unicode(self.das_abstracted[k])
def prune_features(self, clser, min_pos_feature_count, min_neg_feature_count, verbose=False): if verbose: print 'Pruning the features' print features_counts = defaultdict(int) for feat in self.classifiers_features[clser]: for f in feat: features_counts[f] += 1 if verbose: print " Number of features: ", len(features_counts) features_counts = defaultdict(lambda: [0, 0]) for feat, output in zip(self.classifiers_features[clser], self.classifiers_outputs[clser]): output = 0 if output < 0.5 else 1 for f in feat: features_counts[f][output] += 1 remove_features = [] for f in features_counts: negative, positive = features_counts[f] if positive >= min_pos_feature_count + len(f): # keep it continue if negative >= min_neg_feature_count + len(f): # keep it continue # remove the feature since it does not meet the criteria remove_features.append(f) if verbose: print " Number of features occurring less then %d positive times and %d negative times: %d" % \ (min_pos_feature_count, min_neg_feature_count, len(remove_features)) remove_features = set(remove_features) for feat in self.classifiers_features[clser]: feat.prune(remove_features) # count the features again and report the result features_counts = defaultdict(int) for feat in self.classifiers_features[clser]: for f in feat: features_counts[f] += 1 self.classifiers_features_list[clser] = features_counts.keys() self.classifiers_features_mapping[clser] = {} for i, f in enumerate(self.classifiers_features_list[clser]): self.classifiers_features_mapping[clser][f] = i if verbose: print " Number of features after pruning: ", len(features_counts)
def invoke(self, dirname, filenames=set(), linter_configs=set()): """ Main entrypoint for all plugins. Returns results in the format of: {'filename': { 'line_number': [ 'error1', 'error2' ] } } """ retval = defaultdict(lambda: defaultdict(list)) extensions = ' -o '.join(['-name "*%s"' % ext for ext in self.get_file_extensions()]) cmd = 'find %s %s | xargs %s' % ( dirname, extensions, self.get_command( dirname, linter_configs=linter_configs)) result = self.executor(cmd) for line in result.split('\n'): output = self.process_line(dirname, line) if output is not None: filename, lineno, messages = output if filename.startswith(dirname): filename = filename[len(dirname) + 1:] retval[filename][lineno].append(messages) return retval
def _Symbolize(input): asan_libs = _FindASanLibraries() libraries = collections.defaultdict(list) asan_lines = [] for asan_log_line in [a.strip() for a in input]: m = _ParseAsanLogLine(asan_log_line) if m: libraries[m['library']].append(m) asan_lines.append({'raw_log': asan_log_line, 'parsed': m}) all_symbols = collections.defaultdict(dict) original_symbols_dir = symbol.SYMBOLS_DIR for library, items in libraries.iteritems(): libname = _TranslateLibPath(library, asan_libs) lib_relative_addrs = set([i['rel_address'] for i in items]) info_dict = symbol.SymbolInformationForSet(libname, lib_relative_addrs, True) if info_dict: all_symbols[library]['symbols'] = info_dict for asan_log_line in asan_lines: m = asan_log_line['parsed'] if not m: print asan_log_line['raw_log'] continue if (m['library'] in all_symbols and m['rel_address'] in all_symbols[m['library']]['symbols']): s = all_symbols[m['library']]['symbols'][m['rel_address']][0] print s[0], s[1], s[2] else: print asan_log_line['raw_log']
def reload(self, data): self.data = data self.data = [row for row in self.data if row["lang"] == self.lang] self.by_name = {} for row in self.data: self.by_name[row["name"]] = { "iso2":row["iso2"], "sub":row["sub"], } def ngram_dict(): return { "name": defaultdict(float), "total": 0.0 } names = defaultdict(list) for row in self.data: iso2 = row["iso2"] name = row["name"] names[iso2].append(name) self.ngrams = defaultdict(ngram_dict) for iso2, name_list in list(names.items()): weight = 1.0 / len(name_list) for name in name_list: for ngram in text_to_ngrams(name, self.size): self.ngrams[ngram]["name"][name] += weight self.ngrams[ngram]["total"] += weight
def search_all(self, text): candidates = defaultdict(float) for ngram in text_to_ngrams(text, self.size): matches = self.ngrams.get(ngram, None) if not matches: continue total = matches["total"] for name, value in list(matches["name"].items()): candidates[name] += float(value) / total def score(): return { "value": 0.0, } high_scores = defaultdict(score) for name, value in list(candidates.items()): row = self.by_name.get(name, None) key = row["iso2"] if row["sub"]: key += ">" if value > high_scores[key]["value"]: high_scores[key] = { "iso2": row["iso2"], "value": value, "name": name, "sub": row["sub"], } high_scores = sorted(high_scores.values(), key=lambda x: x["value"], reverse=True) return high_scores
def findSubstring(self, s, words): """ :type s: str :type words: List[str] :rtype: List[int] """ result, m, n, k = [], len(s), len(words), len(words[0]) if m < n*k: return result lookup = collections.defaultdict(int) for i in words: lookup[i] += 1 # Space: O(n * k) for i in xrange(m+1-k*n): # Time: O(m) cur, j = collections.defaultdict(int), 0 while j < n: # Time: O(n) word = s[i+j*k:i+j*k+k] # Time: O(k) if word not in lookup: break cur[word] += 1 if cur[word] > lookup[word]: break j += 1 if j == n: result.append(i) return result
def generate_te_doping(self, d): types = ['p', 'n'] target = 'seebeck_doping' # root key for getting all temps, etc pf_dict = defaultdict(lambda: defaultdict(int)) zt_dict = defaultdict(lambda: defaultdict(int)) for type in types: for t in d[target][type]: # temperatures outside_pf_array = [] outside_zt_array = [] for didx, tensor in enumerate(d[target][type][t]): # doping idx inside_pf_array = [] inside_zt_array = [] for tidx, val in enumerate(tensor): seebeck = d['seebeck_doping'][type][t][didx][tidx] cond = d['cond_doping'][type][t][didx][tidx] kappa = d['kappa_doping'][type][t][didx][tidx] inside_pf_array.append(seebeck*seebeck*cond) inside_zt_array.append(seebeck*seebeck*cond*t/kappa) outside_pf_array.append(inside_pf_array) outside_zt_array.append(inside_zt_array) pf_dict[type][t] = outside_pf_array zt_dict[type][t] = outside_zt_array return pf_dict, zt_dict
def add2Tree(tree,id,name,floor,type): type = 'Type-'+type for i, btype in enumerate(tree['item']): if btype['id'] == type: ntype = btype break else: ntype = defaultdict() ntype['text'] = type ntype['id'] = type ntype['item'] = [] tree['item'].append(ntype) floor = 'Floor-' + floor floorid = type + floor for i, bfloor in enumerate(ntype['item']): if bfloor['id'] == floorid: nfloor = bfloor break else: nfloor = defaultdict() nfloor['text'] = floor nfloor['id'] = floorid nfloor['item'] = [] ntype['item'].append(nfloor) leaf = defaultdict() leaf['id']= str(id) + '-' + name leaf['text']= '.'.join(name.split('.')[3:]) nfloor['item'].append(leaf)
def metadata_catalog(fits_filenames): "Histogram the metadata values in list of fits files." common, optional = metadata_field_use(fits_filenames) allfields = optional.union(common) histo = collections.defaultdict(int) values = collections.defaultdict(set) for fname in fits_filenames: hdulist = pyfits.open(fname) hdr = hdulist[0].header for field in allfields: if field in hdr: histo[field] += 1 values[field].add(str(hdr[field])) hdulist.close() print('\n', '~'*78) print('Histogram of field use:') pprint(histo) print('\n', '~'*78) max_unique = 0.80 print('Values used (max %s unique values):'%(max_unique)) #! pprint(values) for k,v in values.items(): if float(len(v))/len(fits_filenames) > max_unique: continue print('%8s: %s'%(k,', '.join(v)))
def to_dict(self, default=None): """ Converts sequence of (Key, Value) pairs to a dictionary. >>> type(seq([('a', 1)]).to_dict()) dict >>> seq([('a', 1), ('b', 2)]).to_dict() {'a': 1, 'b': 2} :param default: Can be a callable zero argument function. When not None, the returned dictionary is a collections.defaultdict with default as value for missing keys. If the value is not callable, then a zero argument lambda function is created returning the value and used for collections.defaultdict :return: dictionary from sequence of (Key, Value) elements """ dictionary = {} for e in self.sequence: dictionary[e[0]] = e[1] if default is None: return dictionary else: if hasattr(default, '__call__'): return collections.defaultdict(default, dictionary) else: return collections.defaultdict(lambda: default, dictionary)
def __init__(self, ldg=None): DependencyGraph.__init__(self) self.nodes = defaultdict(lambda: {'address': None, 'ldg': 0, 'gid': 1, #has the same value of the gid of nodes in ldg. 'lemma': None, 'head': None, 'deps': defaultdict(int), 'remaining_ops': defaultdict(list), #list(LgGraph.operator_dic.keys()), 'ctag': None, 'tag': None, 'feats': None, }) self.git_list = [1] self.nodes[0].update( {'address': 0, 'head': -1, 'ldg': 'TOP', 'gid': 1, #has the same value of the gid of nodes in ldg. 'remaining_ops': defaultdict(list), } ) if isinstance(ldg, LgGraph): self.nodes[0]['ldg'] = ldg if isinstance(ldg, GraphNet): self.nodes = ldg self.git_list = ldg.get_git_list()
def hierarchical(keys): """ Iterates over dimension values in keys, taking two sets of dimension values at a time to determine whether two consecutive dimensions have a one-to-many relationship. If they do a mapping between the first and second dimension values is returned. Returns a list of n-1 mappings, between consecutive dimensions. """ ndims = len(keys[0]) if ndims <= 1: return True dim_vals = list(zip(*keys)) combinations = (zip(*dim_vals[i : i + 2]) for i in range(ndims - 1)) hierarchies = [] for combination in combinations: hierarchy = True store1 = defaultdict(list) store2 = defaultdict(list) for v1, v2 in combination: if v2 not in store2[v1]: store2[v1].append(v2) previous = store1[v2] if previous and previous[0] != v1: hierarchy = False break if v1 not in store1[v2]: store1[v2].append(v1) hierarchies.append(store2 if hierarchy else {}) return hierarchies
def __init__(self, names, messages): self.names = names self.messages = messages # do some preanalysis # MBU: user_id -> [message] self.messages_by_user = defaultdict(list) # who has liked {{user}}'s messages? # user_id -> (liker -> count) self.likes_per_user = defaultdict(lambda : defaultdict(int)) # who has {{user}} liked? # user_id -> (liked -> count) self.user_likes = defaultdict(lambda : defaultdict(int)) # which words are used most often? # word -> (user_id -> count) self.most_common_words = defaultdict(lambda : defaultdict(int)) # per user, which words are used most often? # user_id -> (word -> count) self.mcw_per_user = defaultdict(lambda : defaultdict(int)) # which users have liked their own posts? # user_id -> count self.self_likes = defaultdict(int) for message in messages: self.read_message(message)
def __init__(self, k, messages): self.k = k # user_id -> (phrase -> [next words]) self.m = defaultdict(lambda : defaultdict(list)) for message in messages: self.read_message(message)
def __init__(self, analysis_files): # The analysis files we gather information from. self.analysis_files = analysis_files # Map from scala source files to the class files generated from that source self.products = defaultdict(set) # Map from scala sources to jar files they depend on. (And, rarely, class files.) self.binary_deps = defaultdict(set) # Map from scala sources to the source files providing the classes that they depend on # The set of source files here does *not* appear to include inheritance! # eg, in src/jvm/com/foursquare/api/util/BUILD:util, # in the source file ClientMetrics, class ClientView extends PrettyEnumeration, but # the file declaring PrettyEnumeration is *not* in the source deps. # But PrettyEnumeration *is* included in the list of classes in external_deps. self.source_deps = defaultdict(set) # Map from scala sources to the classes that they depend on. (Not class files, source files, but just classes. self.external_deps = defaultdict(set) # Map from scala sources to the classes that they provide. (Again, not class files, fully-qualified class names.) self.class_names = defaultdict(set) for c in self.analysis_files: self.parse(c)
def show_connections(): user=User.query.filter_by(id=session['user_id']).first() if 'linkedin_token' in session: conns = linkedin.get('people/~/connections:(headline,id,first-name,last-name,location,industry,picture-url)') f = open('data.json', 'w') f.write(json.dumps(conns.data, indent=1)) f.close() connections = json.loads(json.dumps(conns.data, indent=1)) # Get an id for a connection. We'll just pick the first one. print len(connections['values']) index = 0 all = list() categorized = defaultdict(list) countdata = defaultdict() for conn in connections['values']: try: #all.append() name = conn['firstName'].encode("utf-8")+' '+conn['lastName'].encode("utf-8") industry = conn['industry'].encode("utf-8") headline = conn['headline'].encode("utf-8") contact = (name, industry, headline) #conn['firstName'].encode("utf-8"), conn['lastName'].encode("utf-8"), conn['id'].encode("utf-8"), , conn['picture-url'].encode("utf-8"), conn['location'].encode("utf-8") all.append(contact) categorized[industry].append(contact) except KeyError: pass index = index+1 for key in categorized: if len(categorized[key])*1000/index > 10 : countdata[key] = len(categorized[key]) print key,countdata[key] return render_template("connections.html", title = 'Connections', all_conn=all, cat_conn=categorized, cat_count = countdata, user=user)
def edit_quantiles(self,q=.01,quantile_range=False,v=False,write=True): basic.log('creating edit quantiles %s' % self.lang) f_out = basic.create_dir('results/quantiles') df = pd.read_csv(self.db_path) df = self.drop_dups(df) df.page_id = df.page_id.astype(int) if self.drop1: df = df.loc[(df['len'] > 1)] q = np.arange(q,1+q,q) results = defaultdict(dict) for n in self.namespace: results[n] = defaultdict(dict) for r in self.revert: basic.log('%s %s %s' % (self.lang,n,r)) if n == 'at': result = df[r].quantile(q=q) mean = df[r].mean() else: result = df.loc[(df['namespace'] == self.namespace.index(n)),r].quantile(q=q) #qcut = pd.qcut(df.loc[(df['namespace'] == self.namespace.index(n)),r],q) #print(qcut) mean = df.loc[(df['namespace'] == self.namespace.index(n)),r].mean() result = result.to_frame() column = '%s_%s_%s' % (self.lang,n,r) result.columns = [column] results[n][r] = {'quantiles':result,'mean':mean} if write: result = result.append(DataFrame({column:result.loc[(result[column] < int(mean+1))].tail(1).index.values},index=['mean_quantile'])) result = result.append(DataFrame({column:mean},index=['mean_value'])) result.to_csv('%s/%s_%s_%s.csv' % (f_out,self.lang,n,r),encoding='utf-8',index_label='qauntiles') return results
token_id_freq = [] for token, id in sorted_token_id: if token_id_freq: prev_tok, prev_id, prev_freq = token_id_freq[-1] if prev_tok == token and prev_id == id: token_id_freq[-1] = (token, id, prev_freq + 1) else: token_id_freq.append((token, id, 1)) else: token_id_freq.append((token, id, 1)) return token_id_freq token_id_freq = merge_token_in_report(sorted_token_id) dictionary = defaultdict(lambda: (0, 0)) postings = defaultdict(lambda: []) #fill in dictionary for token, id, freq in token_id_freq: dictionary[token] = (dictionary[token][0] + 1, dictionary[token][1] + freq) #fill in postings for token, id, freq in token_id_freq: postings[token].append((id, freq)) # Sort the postings for key, values in postings.items(): postings[key] = sorted(values, key=itemgetter(0))
def __init__(self, width, height): self.width = width self.height = height self.canvas = defaultdict(lambda: defaultdict(lambda: '#'))
class EthernetModel(object): frame_queues = defaultdict(deque) calc_crc = True rx_frame_isr = None rx_isr_enabled = False frame_times = defaultdict(deque) # Used to record reception time @classmethod def enable_rx_isr(cls, interface_id): cls.rx_isr_enabled = True if cls.frame_queues[interface_id] and cls.rx_frame_isr is not None: Interrupts.trigger_interrupt(cls.rx_frame_isr, 'Ethernet_RX_Frame') @classmethod def disable_rx_isr(self, interface_id): EthernetModel.rx_isr_enabled = False @classmethod @peripheral_server.tx_msg def tx_frame(cls, interface_id, frame): ''' Creates the message that Peripheral.tx_msga will send on this event ''' print("Sending Frame (%i): " % len(frame), binascii.hexlify(frame)) # print "" msg = {'interface_id': interface_id, 'frame': frame} return msg @classmethod @peripheral_server.reg_rx_handler def rx_frame(cls, msg): ''' Processes reception of this type of message from PeripheralServer.rx_msg ''' interface_id = msg['interface_id'] log.info("Adding Frame to: %s" % interface_id) frame = msg['frame'] cls.frame_queues[interface_id].append(frame) cls.frame_times[interface_id].append(time.time()) log.info("Adding Frame to: %s" % interface_id) if cls.rx_frame_isr is not None and cls.rx_isr_enabled: Interrupts.trigger_interrupt(cls.rx_frame_isr, 'Ethernet_RX_Frame') @classmethod def get_rx_frame(cls, interface_id, get_time=False): frame = None rx_time = None log.info("Checking for: %s" % str(interface_id)) if cls.frame_queues[interface_id]: log.info("Returning frame") frame = cls.frame_queues[interface_id].popleft() rx_time = cls.frame_times[interface_id].popleft() if get_time: return frame, rx_time else: return frame @classmethod def get_frame_info(cls, interface_id): ''' return number of frames and length of first frame ''' queue = cls.frame_queues[interface_id] if queue: return len(queue), len(queue[0]) return 0, 0
from collections import defaultdict N = int(input().strip()) W = sorted([int(x) for x in input().strip().split()]) toys = defaultdict(lambda: []) price = 1 border_weight = W[0] + 4 for w in W: if border_weight < w: border_weight = w + 4 else: pass toys[border_weight].append(w) print(len(toys))
for word in words: uword = word.lower().replace('ё', 'е') if uword not in vocabulary and not is_int(word) and word not in '. ? ! : - , — – ) ( " \' « » „ “ ; …'.split(): if uword not in oov_tokens: wrt.write('Sample with oov-word "{}":\n'.format(word)) wrt.write('Question: {}\n'.format(sample.question)) wrt.write('Short answer: {}\n'.format(sample.short_answer)) wrt.write('Expanded answer: {}\n'.format(sample.expanded_answer)) wrt.write('\n\n') oov_tokens.add(uword) break # Делаем морфологические шаблоны для модели интерпретации knn-1 print('Building knn1 templates from {} samples...'.format(len(samples2))) templates2 = collections.Counter() packed2samples = collections.defaultdict(list) for sample in samples2: # НАЧАЛО ОТЛАДКИ #if 'зовут' not in sample.left or sample.short_phrase.lower() != 'илья': # continue # КОНЕЦ ОТЛАДКИ context = [s.strip() for s in sample.left.split('|')] + [sample.short_phrase] expanded_tokens = lemmatizer.lemmatize(tagger.tag(tokenizer.tokenize(sample.expanded_phrase))) context_templates = [create_context_template(iline, line_str, expanded_tokens) for iline, line_str in enumerate(context)] if any((z is None) for z in context_templates): continue expanded_template = create_expanded_template(context_templates, expanded_tokens) # выкидываем из контекста леммы, так они были нужны только для
def clear(self): """ Clears the record batch builder. """ self.batch_ = defaultdict(list) self.batch_sizes_ = {}
def isEscapePossible(self, blocked: List[List[int]], source: List[int], target: List[int]) -> bool: blocked_rows = defaultdict(list) bloked_cols = defaultdict(list) for row, col in sorted(blocked): blocked_rows[col].append(row) bloked_cols[row].append(col) target_col = target[1] target_row = target[0] visited = set() # directions RIGHT = 0 DOWN = 1 LEFT = 2 TOP = 3 def dfs(row, col): if row == target_row and col == target_col: return True for direction in RIGHT, DOWN, LEFT, TOP: next_row = row next_col = col if direction == RIGHT: i = bisect_right(bloked_cols[row], col) next_blocked_col = float('inf') if i < len(bloked_cols[row]): next_blocked_col = bloked_cols[row][i] next_col = min(next_blocked_col - 1, target_col) elif direction == LEFT: i = bisect_right(bloked_cols[row], col) - 1 next_blocked_col = -1 if i >= 0: next_blocked_col = bloked_cols[row][i] next_col = min(next_blocked_col + 1, target_col) elif direction == DOWN: i = bisect_right(blocked_rows[col], row) next_blocked_row = float('inf') if i < len(blocked_rows[col]): next_blocked_row = blocked_rows[col][i] next_row = min(next_blocked_row - 1, target_row) elif direction == TOP: i = bisect_right(blocked_rows[col], row) - 1 next_blocked_row = -1 if i >= 0: next_blocked_row = blocked_rows[col][i] next_row = min(next_blocked_row + 1, target_row) key = (next_row, next_col) if key in visited: continue visited.add(key) if dfs(next_row, next_col): return True return False return dfs(source[0], source[1])
import re from os.path import join import argparse from collections import defaultdict parser = argparse.ArgumentParser() parser.add_argument( "meta", help= "Required. the FULL path to the tab delimited meta file containing run info" ) args = parser.parse_args() assert args.meta is not None, "please provide the path to the meta file" FILES = defaultdict(lambda: defaultdict(list)) with open(args.meta, "r") as f: reader = csv.reader(f, delimiter="\t") # skip the header header = next(reader) for row in reader: run_id = row[0].strip() flow_cell = row[1].strip() sample = row[2].strip() # This is name for the fastq folder batch_name = row[3].strip() csv_file = row[4].strip() ## now just assume the file name in the metafile contained in the fastq file path FILES[sample][batch_name].append(run_id) FILES[sample][batch_name].append(flow_cell)
def __init__(self, _io, _parent=None, _root=None): self._io = _io self._parent = _parent self._root = _root if _root else self self._debug = collections.defaultdict(dict)
'''i/p: ['ashok', 'hari', 'bhanu', 'anil', 'bharath', 'anvesh', 'uday', 'raja'] o/p: {'a': ['ashok', 'anil', 'anvesh'], 'b': ['bhanu', 'bharath'], 'h': ['hari'], 'u': ['uday'], 'r': ['raja']}''' val = ['ashok', 'hari', 'bhanu', 'anil', 'bharath', 'anvesh', 'uday', 'raja'] d = {} for i in val: # 'ashok' if i[0] not in d: d[i[0]] = [] #{'a': [], 'h':[]} d[i[0]].append(i) #{'a': ['ashok'], 'h': ['hari']} print(d) from collections import defaultdict df = defaultdict(set) print(df) for i in val: df[i[0]].add(i) print(df) a = [3, 4, 5, 6, 4, 6, 6, 4, 5] '''o/p :{3:1, 4:3, 5:2, 6:3}''' di = {} #di = defaultdict(int) for i in a: #0 + 1 #d[i] += 1 # d[i] = d[i] + 1 di[i] = a.count(i)
def get_process_state(self, name, pids, try_sudo): st = defaultdict(list) # Remove from cache the processes that are not in `pids` cached_pids = set(self.process_cache[name].keys()) pids_to_remove = cached_pids - pids for pid in pids_to_remove: del self.process_cache[name][pid] for pid in pids: st['pids'].append(pid) new_process = False # If the pid's process is not cached, retrieve it if pid not in self.process_cache[ name] or not self.process_cache[name][pid].is_running(): new_process = True try: self.process_cache[name][pid] = psutil.Process(pid) self.log.debug('New process in cache: {}'.format(pid)) # Skip processes dead in the meantime except psutil.NoSuchProcess: self.warning( 'Process {} disappeared while scanning'.format(pid)) # reset the PID cache now, something changed self.last_pid_cache_ts[name] = 0 continue p = self.process_cache[name][pid] meminfo = self.psutil_wrapper(p, 'memory_info', ['rss', 'vms'], try_sudo) st['rss'].append(meminfo.get('rss')) st['vms'].append(meminfo.get('vms')) mem_percent = self.psutil_wrapper(p, 'memory_percent', None, try_sudo) st['mem_pct'].append(mem_percent) # will fail on win32 and solaris shared_mem = self.psutil_wrapper(p, 'memory_info', ['shared'], try_sudo).get('shared') if shared_mem is not None and meminfo.get('rss') is not None: st['real'].append(meminfo['rss'] - shared_mem) else: st['real'].append(None) ctxinfo = self.psutil_wrapper(p, 'num_ctx_switches', ['voluntary', 'involuntary'], try_sudo) st['ctx_swtch_vol'].append(ctxinfo.get('voluntary')) st['ctx_swtch_invol'].append(ctxinfo.get('involuntary')) st['thr'].append( self.psutil_wrapper(p, 'num_threads', None, try_sudo)) cpu_percent = self.psutil_wrapper(p, 'cpu_percent', None, try_sudo) cpu_count = psutil.cpu_count() if not new_process: # psutil returns `0.` for `cpu_percent` the # first time it's sampled on a process, # so save the value only on non-new processes st['cpu'].append(cpu_percent) if cpu_count > 0 and cpu_percent is not None: st['cpu_norm'].append(cpu_percent / cpu_count) else: self.log.debug( 'could not calculate the normalized cpu pct, cpu_count: {}' .format(cpu_count)) st['open_fd'].append( self.psutil_wrapper(p, 'num_fds', None, try_sudo)) st['open_handle'].append( self.psutil_wrapper(p, 'num_handles', None, try_sudo)) ioinfo = self.psutil_wrapper( p, 'io_counters', ['read_count', 'write_count', 'read_bytes', 'write_bytes'], try_sudo) st['r_count'].append(ioinfo.get('read_count')) st['w_count'].append(ioinfo.get('write_count')) st['r_bytes'].append(ioinfo.get('read_bytes')) st['w_bytes'].append(ioinfo.get('write_bytes')) pagefault_stats = self.get_pagefault_stats(pid) if pagefault_stats is not None: (minflt, cminflt, majflt, cmajflt) = pagefault_stats st['minflt'].append(minflt) st['cminflt'].append(cminflt) st['majflt'].append(majflt) st['cmajflt'].append(cmajflt) else: st['minflt'].append(None) st['cminflt'].append(None) st['majflt'].append(None) st['cmajflt'].append(None) # calculate process run time create_time = self.psutil_wrapper(p, 'create_time', None, try_sudo) if create_time is not None: now = time.time() run_time = now - create_time st['run_time'].append(run_time) return st
import xml.etree.ElementTree as Tree lyp_attr = ['layer', 'datatype', 'source', 'fill-color', 'frame-color', 'frame-brightness', 'fill-brightness', 'dither-pattern', 'valid', 'visible', 'transparent', 'width', 'marked', 'animation'] nazca_attr = ['layer', 'datatype', 'name', 'fill_color', 'frame_color', 'frame_brightness', 'fill_brightness', 'dither_pattern', 'valid', 'visible', 'transparent', 'width', 'marked', 'animation'] doPrint = False #============================================================================== # lyp2csv #============================================================================== tabdict = defaultdict(list) depth=0 def __parse_properties(lev1, infolevel=0): """Parse lyp tags <properties> and <group-member> levels.""" global tabdict global depth depth += 1 tabdict['depth'].append(depth) for lev2 in lev1: tag = lev2.tag value = lev1.find(tag).text if infolevel > 2: if tag == 'group-members': # remove linefeed value = '' print("{}{}: {}".format(' '*depth, tag, value))
def __init__(self, config: FrigateConfig, client, topic_prefix, tracked_objects_queue, event_queue, event_processed_queue, stop_event): threading.Thread.__init__(self) self.name = "detected_frames_processor" self.config = config self.client = client self.topic_prefix = topic_prefix self.tracked_objects_queue = tracked_objects_queue self.event_queue = event_queue self.event_processed_queue = event_processed_queue self.stop_event = stop_event self.camera_states: Dict[str, CameraState] = {} self.frame_manager = SharedMemoryFrameManager() def start(camera, obj: TrackedObject, current_frame_time): self.event_queue.put(('start', camera, obj.to_dict())) def update(camera, obj: TrackedObject, current_frame_time): after = obj.to_dict() message = { 'before': obj.previous, 'after': after, 'type': 'new' if obj.previous['false_positive'] else 'update' } self.client.publish(f"{self.topic_prefix}/events", json.dumps(message), retain=False) obj.previous = after def end(camera, obj: TrackedObject, current_frame_time): snapshot_config = self.config.cameras[camera].snapshots event_data = obj.to_dict(include_thumbnail=True) event_data['has_snapshot'] = False if not obj.false_positive: message = { 'before': obj.previous, 'after': obj.to_dict(), 'type': 'end' } self.client.publish(f"{self.topic_prefix}/events", json.dumps(message), retain=False) # write snapshot to disk if enabled if snapshot_config.enabled: jpg_bytes = obj.get_jpg_bytes( timestamp=snapshot_config.timestamp, bounding_box=snapshot_config.bounding_box, crop=snapshot_config.crop, height=snapshot_config.height) with open( os.path.join(CLIPS_DIR, f"{camera}-{obj.obj_data['id']}.jpg"), 'wb') as j: j.write(jpg_bytes) event_data['has_snapshot'] = True self.event_queue.put(('end', camera, event_data)) def snapshot(camera, obj: TrackedObject, current_frame_time): mqtt_config = self.config.cameras[camera].mqtt if mqtt_config.enabled: jpg_bytes = obj.get_jpg_bytes( timestamp=mqtt_config.timestamp, bounding_box=mqtt_config.bounding_box, crop=mqtt_config.crop, height=mqtt_config.height) self.client.publish( f"{self.topic_prefix}/{camera}/{obj.obj_data['label']}/snapshot", jpg_bytes, retain=True) def object_status(camera, object_name, status): self.client.publish(f"{self.topic_prefix}/{camera}/{object_name}", status, retain=False) for camera in self.config.cameras.keys(): camera_state = CameraState(camera, self.config, self.frame_manager) camera_state.on('start', start) camera_state.on('update', update) camera_state.on('end', end) camera_state.on('snapshot', snapshot) camera_state.on('object_status', object_status) self.camera_states[camera] = camera_state # { # 'zone_name': { # 'person': { # 'camera_1': 2, # 'camera_2': 1 # } # } # } self.zone_data = defaultdict(lambda: defaultdict(lambda: {}))
def __init__(self, *args, **kwargs): super(ovs_ctl, self).__init__(*args, **kwargs) self.mac_to_port = {} self.switches = {} self.dpid_to_name = { # Original environment -- uncomment the 3 lines below # 95536754289: 'h00', # 95535344413: 'h01', # 95542363502: 'h02' # ORCA first demo -- uncomment the 3 lines below # 95534111059: 'h00', # 95538556217: 'h01', # 95533205304: 'h02' # Virtual Machine SONAr -- uncomment the 5 lines below # 95532435104: 's01', # 95533179799: 's02', # 95532162947: 's03', # 95539282496: 's04', # 95533558180: 's05' # ORCA second demo -- uncomment the 5 lines below: # 95532594594: 's01', # 95534454058: 's02', # 95536781980: 's03', # 95531791552: 's04', # 47102661227: 's05' # 95532050795: 's05' # ORCA Final Demo int('000000163ea46de1', 16): 's01', int('000000163e1d4d1f', 16): 's02', int('0000000af789926b', 16): 's03', int('000000163e784ab7', 16): 's04' } self.topology = defaultdict(dict) self.topology['s01']['s02'] = 1 self.topology['s01']['s04'] = 2 self.topology['s02']['s03'] = 1 self.topology['s02']['s01'] = 2 self.topology['s03']['s04'] = 1 self.topology['s03']['s02'] = 2 self.topology['s04']['s01'] = 1 self.topology['s04']['s03'] = 2 self.speed = defaultdict(dict) self.speed['s01']['s02'] = 1000 self.speed['s01']['s04'] = 1000 self.speed['s02']['s03'] = 1000 self.speed['s02']['s01'] = 1000 self.speed['s03']['s04'] = 1000 self.speed['s03']['s02'] = 1000 self.speed['s04']['s01'] = 1000 self.speed['s04']['s03'] = 1000 self.ports = {} self.arp_disabled_ports = self.ports_to_disable() self.control = {} self.waiters = {} # Instantiate the OVS SDR Controller self.ovs_controller_thread = ovs_controller( name='OVS', req_header='ovs_req', # Don't modify rep_header='ovs_rep', # Don't modify create_msg='ovc_crs', request_msg='ovc_rrs', update_msg='ovc_urs', delete_msg='ovc_drs', topology_msg='ovc_trs', host=kwargs.get('host', '0.0.0.0'), port=kwargs.get('port', 3200), ovs=self ) # Start the OVS SDR Controller Server self.ovs_controller_hub = hub.spawn(self.ovs_controller_thread.run) self.count = len(self.topology) self.switch_config_count = {} self.single = {} self.st = time.time()
def __init__(self, items=None): self._collection = defaultdict(list) self._add_items_impl(self._collection, take_with_default(items, []))
def scrape(self, chamber, term): urls = {'lower': "http://www.msa.md.gov/msa/mdmanual/06hse/html/hseal.html", 'upper': "http://www.msa.md.gov/msa/mdmanual/05sen/html/senal.html"} detail_re = re.compile('\((R|D)\), (?:Senate President, )?(?:House Speaker, )?District (\w+)') with self.urlopen(urls[chamber]) as html: doc = lxml.html.fromstring(html) # rest of data on this page is <li>s that have anchor tags for a in doc.cssselect('li a'): link = a.get('href') # tags don't close so we get the <li> and <a> content and diff them name_text = a.text_content() detail_text = a.getparent().text_content().replace(name_text, '') # ignore if it is not a valid link if link: # handle names names = name_text.split(',') last_name = names[0] first_name = names[1].strip() # TODO: try to trim first name to remove middle initial if len(names) > 2: suffixes = names[2] else: suffixes = '' # handle details details = detail_text.strip() party, district = detail_re.match(details).groups() party = PARTY_DICT[party] leg_url = BASE_URL+link leg = Legislator(term, chamber, district, ' '.join((first_name, last_name)), first_name, last_name, party=party, suffixes=suffixes, url=leg_url) leg.add_source(url=leg_url) with self.urlopen(leg_url) as leg_html: leg_doc = lxml.html.fromstring(leg_html) img_src = leg_doc.xpath('//img[@align="left"]/@src') if img_src: leg['photo_url'] = BASE_URL + img_src[0] # address extraction # this is pretty terrible, we get address in a format that looks # like: # James Senate Office Building, Room 322 # 11 Bladen St., Annapolis, MD 21401 # (410) 841-3565, (301) 858-3565; 1-800-492-7122, ext. 3565 (toll free) # e-mail: [email protected] # fax: (410) 841-3552, (301) 858-3552 # # Western Maryland Railway Station, 13 Canal St., Room 304, Cumberland, MD 21502 # (301) 722-4780; 1-866-430-9553 (toll free) # e-mail: [email protected] # fax: (301) 722-4790 # usually first ul, sometimes first p try: addr_lines = leg_doc.xpath('//ul')[0].text_content().strip().splitlines() except IndexError: addr_lines = leg_doc.xpath('//p')[0].text_content().strip().splitlines() addr_pieces = {'capitol': defaultdict(str), 'district': defaultdict(str)} addr_type = 'capitol' for line in addr_lines: if '(401)' in line or '(301)' in line: addr_pieces[addr_type]['phone'] = line elif 'toll free' in line: pass # skip stand alone 1-800 numbers elif 'e-mail' in line: addr_pieces[addr_type]['email'] = line.replace('email: ', '') elif 'fax' in line: addr_pieces[addr_type]['fax'] = line.replace('fax: ', '') elif line == '': addr_type = 'district' else: addr_pieces[addr_type]['address'] += '{0}\n'.format(line) if addr_pieces['capitol']: leg.add_office('capitol', 'Capitol Office', **addr_pieces['capitol']) leg['email'] = (addr_pieces['capitol']['email'] or addr_pieces['district']['email'] or None) if addr_pieces['district']: leg.add_office('district', 'District Office', **addr_pieces['district']) self.save_legislator(leg)
# _*_ coding :utf-8 _*_ __author__ = 'du' __blog__ = 'www.cnblogs.com/anmutu;www.zmfei4.com;' __date__ = '2020/1/6 2:01' from collections import defaultdict from datetime import datetime # 效率最高的其实就是这个记忆搜索法。 total = defaultdict(int) total_memory = defaultdict(int) def fib_recursion(k): assert k > 0, "k的值必须大于0" if k in [1, 2]: return 1 else: global total total[k] += 1 return fib_recursion(k - 2) + fib_recursion(k - 1) # 将值存到total里,在递归前判断是否在这个total里,如果在就取total里的值,如果不在就将新值存到total里. def fib_recursion_memory(k): assert k > 0, "k的值必须大于0" if k in [1, 2]: return 1 global total_memory if k in total_memory:
def __init__(self, a, b, ignore_keywords=[], ignore_comments=[], tolerance=0.0, ignore_blanks=True, ignore_blank_cards=True): """ See `FITSDiff` for explanations of the initialization parameters. """ self.ignore_keywords = set(k.upper() for k in ignore_keywords) self.ignore_comments = set(k.upper() for k in ignore_comments) self.tolerance = tolerance self.ignore_blanks = ignore_blanks self.ignore_blank_cards = ignore_blank_cards self.ignore_keyword_patterns = set() self.ignore_comment_patterns = set() for keyword in list(self.ignore_keywords): keyword = keyword.upper() if keyword != '*' and glob.has_magic(keyword): self.ignore_keywords.remove(keyword) self.ignore_keyword_patterns.add(keyword) for keyword in list(self.ignore_comments): keyword = keyword.upper() if keyword != '*' and glob.has_magic(keyword): self.ignore_comments.remove(keyword) self.ignore_comment_patterns.add(keyword) # Keywords appearing in each header self.common_keywords = [] # Set to the number of keywords in each header if the counts differ self.diff_keyword_count = () # Set if the keywords common to each header (excluding ignore_keywords) # appear in different positions within the header # TODO: Implement this self.diff_keyword_positions = () # Keywords unique to each header (excluding keywords in # ignore_keywords) self.diff_keywords = () # Keywords that have different numbers of duplicates in each header # (excluding keywords in ignore_keywords) self.diff_duplicate_keywords = {} # Keywords common to each header but having different values (excluding # keywords in ignore_keywords) self.diff_keyword_values = defaultdict(lambda: []) # Keywords common to each header but having different comments # (excluding keywords in ignore_keywords or in ignore_comments) self.diff_keyword_comments = defaultdict(lambda: []) if isinstance(a, string_types): a = Header.fromstring(a) if isinstance(b, string_types): b = Header.fromstring(b) if not (isinstance(a, Header) and isinstance(b, Header)): raise TypeError('HeaderDiff can only diff astropy.io.fits.Header ' 'objects or strings containing FITS headers.') super(HeaderDiff, self).__init__(a, b)
def extract(self, progress=True, recurse=True, tex_only=False, extract_manifest=False, path=None, rename_dupes=False, **kwargs): if path is None: path = self.folder_out if tex_only: kwargs['use_cache'] = False utils.mkdir_silent(path) utime(path, (self.time, self.time)) if extract_manifest and self.manifest and not tex_only: with open(join(path, 'ifs_manifest.xml'), 'wb') as f: f.write(self.manifest.to_text().encode('utf8')) # build the tree for folder in self.tree.all_folders: if tex_only and folder.name == 'tex': self.tree = folder # make it root to discourage repacking folder.name = '' for f in folder.all_files: f.path = '' break elif tex_only: continue f_path = join(path, folder.full_path) utils.mkdir_silent(f_path) utime(f_path, (self.time, self.time)) # handle different-case-but-same-name for Windows same_name = defaultdict(list) for name, obj in folder.files.items(): same_name[name.lower()].append(obj) for files in same_name.values(): # common base case of "sane ifs file" if len(files) == 1: continue # make them 'a (1)', 'a (2)' etc if rename_dupes: for i, f in enumerate(files[1:]): base, ext = splitext(f.name) f.name = base + ' ({})'.format(i + 1) + ext elif progress: # warn if not silenced all_names = ', '.join([f.name for f in files]) tqdm.write( 'WARNING: Files with same name and differing case will overwrite on Windows ({}). ' .format(all_names) + 'Use --rename-dupes to extract without loss') # else just do nothing # extract the files for f in tqdm(self.tree.all_files, disable=not progress): # allow recurse + tex_only to extract ifs files if tex_only and not isinstance(f, ImageFile) and not isinstance( f, ImageCanvas) and not (recurse and f.name.endswith('.ifs')): continue f.extract(path, **kwargs) if progress: tqdm.write(f.full_path) if recurse and f.name.endswith('.ifs'): rpath = join(path, f.full_path) i = IFS(rpath) i.extract(progress=progress, recurse=recurse, tex_only=tex_only, extract_manifest=extract_manifest, path=rpath.replace('.ifs', '_ifs'), rename_dupes=rename_dupes, **kwargs) # you can't pickle open files, so this won't work. Perhaps there is a way around it? '''to_extract = (f for f in self.tree.all_files if not(tex_only and not isinstance(f, ImageFile) and not isinstance(f, ImageCanvas)))
def _get_empty_index(): return defaultdict(set)
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2019-07-10 16:02 # @Author : bingo # @Site : # @File : 4.py # @Software: PyCharm """ 任一个英文的纯文本文件,统计其中的单词出现的个数 """ from collections import defaultdict import re d = defaultdict(int) # 匹配单词的正则表达式 PATTERN = re.compile('[a-zA-Z]+') def get_word_from_file(file: str) -> 'generator': with open(file, 'r') as f: for line in f: line = line.strip() results = PATTERN.finditer(line) for word in results: yield word.group().lower() for word in get_word_from_file('4_articles.txt'): d[word] += 1
def __init__(self, vertices): self.V = vertices self.graph = defaultdict(list) self.degree = [0] * vertices
def tree(): return defaultdict(tree)
def get_data(input_filename, start_date=None, end_date=None): casos = read_cases(input_filename, order_by="date") dates = sorted(set(c.date for c in casos)) start_date = start_date or dates[0] end_date = end_date or dates[-1] caso_by_key = defaultdict(list) for caso in casos: caso_by_key[row_key(caso)].append(caso) for place_cases in caso_by_key.values(): place_cases.sort(key=lambda row: row.date, reverse=True) order_key = attrgetter("order_for_place") last_case_for_place = {} order_for_place = Counter() for date in date_range(start_date, end_date + datetime.timedelta(days=1), "daily"): for place_key in demographics.place_keys(): place_type, state, city = place_key place_cases = caso_by_key[place_key] valid_place_cases = sorted( [item for item in place_cases if item.date <= date], key=order_key, reverse=True, ) if not valid_place_cases: # There are no cases for this city for this date - skip continue # This place has at least one case for this date (or before), # so use the newest one. last_valid_case = valid_place_cases[0] newest_case = place_cases[0] is_last = date == last_valid_case.date == newest_case.date order_for_place[place_key] += 1 new_case = { "city": city, "city_ibge_code": last_valid_case.city_ibge_code, "date": date, "epidemiological_week": epidemiological_week(date), "estimated_population": last_valid_case.estimated_population, "estimated_population_2019": last_valid_case.estimated_population_2019, "is_last": is_last, "is_repeated": last_valid_case.date != date, "last_available_confirmed": last_valid_case.confirmed, "last_available_confirmed_per_100k_inhabitants": last_valid_case.confirmed_per_100k_inhabitants, "last_available_date": last_valid_case.date, "last_available_death_rate": last_valid_case.death_rate, "last_available_deaths": last_valid_case.deaths, "order_for_place": order_for_place[place_key], "place_type": place_type, "state": state, } last_case = last_case_for_place.get(place_key, None) if last_case is None: new_confirmed = new_case["last_available_confirmed"] new_deaths = new_case["last_available_deaths"] else: new_confirmed = new_case["last_available_confirmed"] - last_case["last_available_confirmed"] new_deaths = new_case["last_available_deaths"] - last_case["last_available_deaths"] new_case["new_confirmed"] = new_confirmed new_case["new_deaths"] = new_deaths last_case_for_place[place_key] = new_case yield new_case
def processAlgorithm(self, parameters, context, feedback): source = self.parameterAsSource(parameters, self.INPUT, context) if source is None: raise QgsProcessingException( self.invalidSourceError(parameters, self.INPUT)) value_field_name = self.parameterAsString(parameters, self.VALUES_FIELD_NAME, context) category_field_names = self.parameterAsFields( parameters, self.CATEGORIES_FIELD_NAME, context) value_field_index = source.fields().lookupField(value_field_name) if value_field_index >= 0: value_field = source.fields().at(value_field_index) else: value_field = None category_field_indexes = [ source.fields().lookupField(n) for n in category_field_names ] # generate output fields fields = QgsFields() for c in category_field_indexes: fields.append(source.fields().at(c)) def addField(name): """ Adds a field to the output, keeping the same data type as the value_field """ field = QgsField(value_field) field.setName(name) fields.append(field) if value_field is None: field_type = 'none' fields.append(QgsField('count', QVariant.Int)) elif value_field.isNumeric(): field_type = 'numeric' fields.append(QgsField('count', QVariant.Int)) fields.append(QgsField('unique', QVariant.Int)) fields.append(QgsField('min', QVariant.Double)) fields.append(QgsField('max', QVariant.Double)) fields.append(QgsField('range', QVariant.Double)) fields.append(QgsField('sum', QVariant.Double)) fields.append(QgsField('mean', QVariant.Double)) fields.append(QgsField('median', QVariant.Double)) fields.append(QgsField('stddev', QVariant.Double)) fields.append(QgsField('minority', QVariant.Double)) fields.append(QgsField('majority', QVariant.Double)) fields.append(QgsField('q1', QVariant.Double)) fields.append(QgsField('q3', QVariant.Double)) fields.append(QgsField('iqr', QVariant.Double)) elif value_field.type() in (QVariant.Date, QVariant.Time, QVariant.DateTime): field_type = 'datetime' fields.append(QgsField('count', QVariant.Int)) fields.append(QgsField('unique', QVariant.Int)) fields.append(QgsField('empty', QVariant.Int)) fields.append(QgsField('filled', QVariant.Int)) # keep same data type for these fields addField('min') addField('max') else: field_type = 'string' fields.append(QgsField('count', QVariant.Int)) fields.append(QgsField('unique', QVariant.Int)) fields.append(QgsField('empty', QVariant.Int)) fields.append(QgsField('filled', QVariant.Int)) # keep same data type for these fields addField('min') addField('max') fields.append(QgsField('min_length', QVariant.Int)) fields.append(QgsField('max_length', QVariant.Int)) fields.append(QgsField('mean_length', QVariant.Double)) request = QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry) if value_field is not None: attrs = [value_field_index] else: attrs = [] attrs.extend(category_field_indexes) request.setSubsetOfAttributes(attrs) features = source.getFeatures( request, QgsProcessingFeatureSource.FlagSkipGeometryValidityChecks) total = 50.0 / source.featureCount() if source.featureCount() else 0 if field_type == 'none': values = defaultdict(lambda: 0) else: values = defaultdict(list) for current, feat in enumerate(features): if feedback.isCanceled(): break feedback.setProgress(int(current * total)) attrs = feat.attributes() cat = tuple([attrs[c] for c in category_field_indexes]) if field_type == 'none': values[cat] += 1 continue if field_type == 'numeric': if attrs[value_field_index] == NULL: continue else: value = float(attrs[value_field_index]) elif field_type == 'string': if attrs[value_field_index] == NULL: value = '' else: value = str(attrs[value_field_index]) elif attrs[value_field_index] == NULL: value = NULL else: value = attrs[value_field_index] values[cat].append(value) (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context, fields, QgsWkbTypes.NoGeometry, QgsCoordinateReferenceSystem()) if sink is None: raise QgsProcessingException( self.invalidSinkError(parameters, self.OUTPUT)) if field_type == 'none': self.saveCounts(values, sink, feedback) elif field_type == 'numeric': self.calcNumericStats(values, sink, feedback) elif field_type == 'datetime': self.calcDateTimeStats(values, sink, feedback) else: self.calcStringStats(values, sink, feedback) return {self.OUTPUT: dest_id}
# -*- coding: utf-8 -*- from collections import defaultdict from . import mapping def none_factory(): return None default_translate_table = defaultdict(none_factory, mapping.translate_table) def fold(unicode_string, replacement=u''): """Fold unicode_string to ASCII. Unmapped characters should be replaced with empty string by default, or other replacement if provided. All astral plane characters are always removed, even if a replacement is provided. """ if unicode_string is None: return u'' if not isinstance(unicode_string, str): raise TypeError('replace must be a str') if not isinstance(replacement, str): raise TypeError('replace must be a str')
def __init__(self, chain, default_registry, private_key_bin, transport, discovery, config): if not isinstance(private_key_bin, bytes) or len(private_key_bin) != 32: raise ValueError('invalid private_key') invalid_timeout = ( config['settle_timeout'] < NETTINGCHANNEL_SETTLE_TIMEOUT_MIN or config['settle_timeout'] > NETTINGCHANNEL_SETTLE_TIMEOUT_MAX ) if invalid_timeout: raise ValueError('settle_timeout must be in range [{}, {}]'.format( NETTINGCHANNEL_SETTLE_TIMEOUT_MIN, NETTINGCHANNEL_SETTLE_TIMEOUT_MAX )) self.token_to_channelgraph = dict() self.tokens_to_connectionmanagers = dict() self.manager_to_token = dict() self.swapkey_to_tokenswap = dict() self.swapkey_to_greenlettask = dict() self.identifier_to_statemanagers = defaultdict(list) self.identifier_to_results = defaultdict(list) # This is a map from a hashlock to a list of channels, the same # hashlock can be used in more than one token (for tokenswaps), a # channel should be removed from this list only when the lock is # released/withdrawn but not when the secret is registered. self.token_to_hashlock_to_channels = defaultdict(lambda: defaultdict(list)) self.chain = chain self.default_registry = default_registry self.config = config self.privkey = private_key_bin self.address = privatekey_to_address(private_key_bin) endpoint_registration_event = gevent.spawn( discovery.register, self.address, config['external_ip'], config['external_port'], ) endpoint_registration_event.link_exception(endpoint_registry_exception_handler) self.private_key = PrivateKey(private_key_bin) self.pubkey = self.private_key.public_key.format(compressed=False) self.protocol = RaidenProtocol( transport, discovery, self, config['protocol']['retry_interval'], config['protocol']['retries_before_backoff'], config['protocol']['nat_keepalive_retries'], config['protocol']['nat_keepalive_timeout'], config['protocol']['nat_invitation_timeout'], ) # TODO: remove this cyclic dependency transport.protocol = self.protocol self.message_handler = RaidenMessageHandler(self) self.state_machine_event_handler = StateMachineEventHandler(self) self.blockchain_events = BlockchainEvents() self.greenlet_task_dispatcher = GreenletTasksDispatcher() self.on_message = self.message_handler.on_message self.alarm = AlarmTask(chain) self.shutdown_timeout = config['shutdown_timeout'] self._block_number = None self.transaction_log = StateChangeLog( storage_instance=StateChangeLogSQLiteBackend( database_path=config['database_path'] ) ) if config['database_path'] != ':memory:': self.database_dir = os.path.dirname(config['database_path']) self.lock_file = os.path.join(self.database_dir, '.lock') self.snapshot_dir = os.path.join(self.database_dir, 'snapshots') self.serialization_file = os.path.join(self.snapshot_dir, 'data.pickle') if not os.path.exists(self.snapshot_dir): os.makedirs(self.snapshot_dir) # Prevent concurrent acces to the same db self.db_lock = filelock.FileLock(self.lock_file) else: self.database_dir = None self.lock_file = None self.snapshot_dir = None self.serialization_file = None self.db_lock = None # If the endpoint registration fails the node will quit, this must # finish before starting the protocol endpoint_registration_event.join() self.start()
def main(): is_title = False last = defaultdict(int) while (True): con = None try: con = mdb.connect('10.75.26.127', 'root', '123456', 'firehose') con2 = mdb.connect('10.75.26.127', 'root', '123456', 'monitor') cur = con.cursor() cur2 = con2.cursor() values = {} values['lines'] = defaultdict(int) values['lines']['part num'] = defaultdict(int) cur_time = int(time.time()) cur_time = cur_time - cur_time % 300 for i in range(7): cur.execute( "select `today_sent` from `session` where username=3855001400 and partnum=%d" % i) rows = cur.fetchall() if len(rows) <= 0: print "can not select from sql" row = rows[0] acc_value = row[0] if (acc_value < last[i]): last[i] = 1 values['lines']['part num']["part_%d" % i] = acc_value - last[i] values['lines']['total'] += acc_value - last[i] last[i] = acc_value cur2.execute( "insert into `kpis` (`appname`,`timestamp`,`kpi_value`) values('app-firehose', %d, '%s')" % (cur_time, json.dumps(values))) print json.dumps(values) if not is_title: title = tree() for kpi_name in values: for dim_name in values[kpi_name]: if (dim_name != "total"): title[kpi_name][dim_name] = values[kpi_name][ dim_name].keys() else: title[kpi_name]["NONE"] = [] if len(title) <= 0: continue title["data_path"] = 'mysql' title_json = json.dumps(title) cur2.execute( "select `appname` from `title` where `appname`='%s'" % 'app-firehose') rows = cur2.fetchall() if (len(rows) <= 0): cur2.execute( "insert into `title`(`appname`, `kpi_json`) values('%s', '%s\')" % ('app-firehose', title_json)) else: cur2.execute( "update `title` set `kpi_json` = '%s' where `appname`='%s'" % (title_json, 'app-firehose')) con.commit() con2.commit() time.sleep(300) except mdb.Error, e: print "Error %d: %s" % (e.args[0], e.args[1]) finally:
def build_graph(self): logging.info("building graph") self.graph = { "nodes": [], "links": [], "terms": [], "people": [], "documents": [] } global_clusters_index = {} index = 0 for time in range(self.num_time_slides): cluster_weight_given_time = np.zeros(self.num_global_clusters) document_count = 0. for y in self.time_slides[time]: document_count += len(self.document_list_given_time[y]) document_count /= len(self.time_slides[time]) for i, cluster in enumerate(self.global_clusters[time]): for c in cluster: for w in self.local_clusters[time][c]: cluster_weight_given_time[ i] += self.term_freq_given_time[time][w] cluster_weight_sum_given_time = sum(cluster_weight_given_time) if cluster_weight_sum_given_time == 0: cluster_weight_sum_given_time = 1 for i, cluster in enumerate(self.global_clusters[time]): terms = [] for c in cluster: for w in self.local_clusters[time][c]: terms.append(w) if len(terms) == 0: continue sorted_terms = sorted(terms, key=lambda t: self.term_freq[t], reverse=True) sorted_terms_given_time = sorted( terms, key=lambda t: self.term_freq_given_time[time][t], reverse=True) self.graph["nodes"].append({ "key": [{ "term": self.term_list[k], "w": int(self.term_freq_given_time[time][k]) } for k in sorted_terms_given_time], "name": self.term_list[sorted_terms_given_time[0]], "pos": time, "w": cluster_weight_given_time[i] / cluster_weight_sum_given_time * (document_count + 1), "n": cluster_weight_given_time[i] / cluster_weight_sum_given_time, "cluster": i }) global_clusters_index[str(time) + "-" + str(i)] = index index += 1 #caculate similarity global_clusters_sim_target = defaultdict(dict) global_clusters_sim_source = defaultdict(dict) for time in range(1, self.num_time_slides): for i1, c1 in enumerate(self.global_clusters[time]): key1 = str(time) + "-" + str(i1) if global_clusters_index.has_key(key1): terms1 = [] for c in c1: for w in self.local_clusters[time][c]: terms1.append(w) for i2, c2 in enumerate(self.global_clusters[time - 1]): key2 = str(time - 1) + "-" + str(i2) if global_clusters_index.has_key(key2): terms2 = [] for c in c2: for w in self.local_clusters[time][c]: terms2.append(w) sim = common_word_with_weight( terms1, terms2, self.term_freq) if sim > 0: global_clusters_sim_target[key1][key2] = sim global_clusters_sim_source[key2][key1] = sim #for i, c in enumerate(self.global_clusters[time]): # key1 = str(time)+"-"+str(i) # key2 = str(time-1)+"-"+str(i) # if global_clusters_index.has_key(key1) and global_clusters_index.has_key(key2): # global_clusters_sim_target[key1][key2] = 1. # global_clusters_sim_source[key2][key1] = 1. for key1 in global_clusters_sim_target: if global_clusters_index.has_key(key1): m1 = sum(global_clusters_sim_target[key1].values()) for key2 in global_clusters_sim_target[key1]: if global_clusters_index.has_key(key2): m2 = sum(global_clusters_sim_source[key2].values()) self.graph["links"].append({ "source": int(global_clusters_index[key2]), "target": int(global_clusters_index[key1]), "w1": global_clusters_sim_target[key1][key2] / float(m1), "w2": global_clusters_sim_target[key1][key2] / float(m2) }) #term frequence sorted_terms = sorted(self.term_list, key=lambda t: self.term_freq[self.term_index[t]], reverse=True) for t in sorted_terms: term_index = self.term_index[t] term_year = defaultdict(list) for d in self.reverse_term_dict[term_index]: term_year[self.document_list[d].stat[0].value].append(d) sorted_term_year = sorted(term_year.items(), key=lambda t: t[0]) if len(sorted_term_year) == 0: continue ty = {} for i in range(self.start_time + 1, self.end_time): ty[i] = 0.0 for c in term_year: ty[c] = len(term_year[c]) start_point = sorted_term_year[0][0] start_time = self.get_time_slide(start_point) start_cluster = self.global_cluster_labels[start_time][ self.local_cluster_labels[start_time][term_index]] start_node = global_clusters_index[str(start_time) + "-" + str(start_cluster)] item = { "t": t, "idx": int(term_index), "freq": int(self.term_freq[term_index]), "dist": [0 for i in range(self.num_time_slides)], "year": [{ "y": j, "d": ty[j] } for j in ty], "cluster": [0 for i in range(self.num_time_slides)], "node": [0 for i in range(self.num_time_slides)], "doc": [int(d) for d in self.reverse_term_dict[term_index]], "first": [{ "p": p, "y": self.term_first_given_person[term_index][p] } for p in self.term_first_given_person[term_index]], "start": { "year": int(start_point), "time": int(start_time), "cluster": int(start_cluster), "node": int(start_node) } } for time in range(self.num_time_slides): item["dist"][time] = int( self.term_freq_given_time[time][term_index]) local_c = self.local_cluster_labels[time][term_index] item["cluster"][time] = int( self.global_cluster_labels[time][local_c]) item["node"][time] = int( global_clusters_index[str(time) + "-" + str(item["cluster"][time])]) self.graph["terms"].append(item) #people for author in self.author_result: self.graph["people"].append({ "id": author.id, "name": author.title, #"hindex": author.h_index, #"pub_count": author.pub_count, #"cite": author.citation_no }) #document for i, doc in enumerate(self.document_list): self.graph["documents"].append({ "idx": i, "id": int(doc.id), "title": doc.title, "year": int(doc.stat[0].value ), #"jconf":doc.jconf_name, #"abs":doc.abs, #"cite":int(doc.stat[2].value) }) #, "authors":doc.author_ids, "topic":doc.topic}) #time slides self.graph["time_slides"] = self.time_slides return self.graph
def train(features): model = collections.defaultdict(lambda: 1) for f in features: model[f] += 1 return model