def gen_cls_sk(sk_dir, cls): mtds = collect_decls(cls, "mtds") flds = collect_decls(cls, "flds") s_flds = filter(op.attrgetter("is_static"), flds) if cls.is_class: if not mtds and not s_flds: return None else: # cls.is_itf or cls.is_enum if not s_flds: return None cname = util.sanitize_ty(cls.name) buf = cStringIO.StringIO() buf.write("package {};\n".format(cname)) buf.write(_const) # static fields buf.write('\n'.join(map(trans_fld, s_flds))) if len(s_flds) > 0: buf.write('\n') # migrating static fields' initialization to <clinit> for fld in ifilter(op.attrgetter("init"), s_flds): if not fld.init.has_call and not fld.init.has_str and not fld.is_aliasing: continue # retrieve (or declare) <clinit> clinit = fld.clazz.get_or_declare_clinit() if clinit not in mtds: mtds.append(clinit) # add assignment assign = st.gen_S_assign(exp.gen_E_id(fld.name), fld.init) clinit.body.append(assign) # accessors for static fields for fld in ifilterfalse(op.attrgetter("is_private"), s_flds): fname = fld.name accessor = trans_fname(fld.clazz.name, fname, True) buf.write(""" {0} {1}() {{ return {2}; }} """.format(trans_ty(fld.typ), accessor, fname)) # methods clinits, mtds = util.partition(lambda m: m.is_clinit, mtds) inits, mtds = util.partition(lambda m: m.is_init, mtds) # <init>/<clinit> should be dumped out in any case buf.write('\n'.join(map(to_func, clinits))) buf.write('\n'.join(map(to_func, inits))) for mtd in mtds: # interface won't have method bodies if mtd.clazz.is_itf: continue buf.write(to_func(mtd) + os.linesep) cls_sk = cname + ".sk" with open(os.path.join(sk_dir, cls_sk), 'w') as f: f.write(buf.getvalue()) logging.info("encoding " + f.name) return cls_sk
def gen_cls_sk(sk_dir, smpls, cls): mtds = collect_decls(cls, "mtds") flds = collect_decls(cls, "flds") s_flds = filter(op.attrgetter("is_static"), flds) if cls.is_class: if not mtds and not s_flds: return None else: # cls.is_itf or cls.is_enum if not s_flds: return None cname = util.sanitize_ty(cls.name) buf = cStringIO.StringIO() buf.write("package {};\n".format(cname)) buf.write(_const) # static fields buf.write('\n'.join(map(trans_fld, s_flds))) if len(s_flds) > 0: buf.write('\n') # migrating static fields' initialization to <clinit> for fld in ifilter(op.attrgetter("init"), s_flds): if not fld.init.has_call and not fld.init.has_str and not fld.is_aliasing: continue # retrieve (or declare) <clinit> clinit = fld.clazz.get_or_declare_clinit() if clinit not in mtds: mtds.append(clinit) # add assignment assign = st.gen_S_assign(exp.gen_E_id(fld.name), fld.init) clinit.body.append(assign) # accessors for static fields for fld in ifilterfalse(op.attrgetter("is_private"), s_flds): fname = fld.name accessor = trans_fname(fld.clazz.name, fname, True) buf.write(""" {0} {1}() {{ return {2}; }} """.format(trans_ty(fld.typ), accessor, fname)) # methods clinits, mtds = util.partition(lambda m: m.is_clinit, mtds) inits, mtds = util.partition(lambda m: m.is_init, mtds) # <init>/<clinit> should be dumped out in any case buf.write('\n'.join(map(partial(to_func, smpls), clinits))) buf.write('\n'.join(map(partial(to_func, smpls), inits))) for mtd in mtds: # interface won't have method bodies if mtd.clazz.is_itf: continue buf.write(to_func(smpls, mtd) + os.linesep) cls_sk = cname + ".sk" with open(os.path.join(sk_dir, cls_sk), 'w') as f: f.write(buf.getvalue()) logging.info("encoding " + f.name) return cls_sk
def _precompute_counts(self): """ Precompute mappings from (L/M) to (L/M-good configs) for discrete L and M. Returns: A table TBL such that TBL[L][M] returns the list of configurations that are L-M-(M-1) acceptable. i.e. you get the _names_ of all configurations worse than (M-1) but still within M times untyped. Running time: - L passes over all configurations, so L*(2**N) where N = num modules Space: - stores at most all configurations in each row of the table, L * (2**N) """ LM_table = [] for L in self.Lvals: row = [[]] # Skip the 0 step unsorted_configs = self.all_configurations() for M in range(1, self.Mmax+1): good_iter, rest_iter = util.partition(unsorted_configs, self._curryLM_acceptable(L, M)) row.append(list(good_iter)) unsorted_configs = rest_iter LM_table.append(row) return LM_table
def handle_aop(self, bot, event, arg): r"""op all unopped member in the channel. Usage: /msg uniko \aop channel channel -- channel name (as seen from the user) """ arg = irclib.irc_lower(arg.strip()) if not self.check_channel(bot, arg): return False network = bot.network nickname = irclib.nm_to_n(event.source() or '') # TODO: asynchronous? for t_network in self.networks: if t_network == network: continue t_channel = t_network.encode(self.channels[t_network])[0] t_channel_obj = t_network.get_channel(t_channel) t_bot = t_network.get_oper(t_channel) if not t_bot: continue members = set(t_channel_obj.users()) members = members.difference(t_channel_obj.opers()) for _ in util.partition(members.__iter__(), 4): # XXX mode_string = b'+' + b'o' * len(_) + b' ' + b' '.join(_) mode_string = t_network.decode(mode_string)[0] t_bot.push_message(Message( command='mode', arguments=(self.channels[t_network], mode_string))) message = t_network.decode(b' '.join(members)[0]) bot.push_message(Message( command='privmsg', arguments=(network.decode(nickname)[0], message))) return True
def iterate(cself, svm, classes): cself.mention('Training SVM...') D = spdiag(classes) qp.update_H(D * K * D) qp.update_Aeq(classes.T) alphas, obj = qp.solve(cself.verbose) # Construct SVM from solution svm = SVM(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) svm._X = bs.instances svm._y = classes svm._alphas = alphas svm._objective = obj svm._compute_separator(K) svm._K = K cself.mention('Recomputing classes...') p_conf = svm._predictions[-bs.L_p:] pos_classes = np.vstack([_update_classes(part) for part in partition(p_conf, bs.pos_groups)]) new_classes = np.vstack([-np.ones((bs.L_n, 1)), pos_classes]) class_changes = round(np.sum(np.abs(classes - new_classes) / 2)) cself.mention('Class Changes: %d' % class_changes) if class_changes == 0: return None, svm return {'svm': svm, 'classes': new_classes}, None
def seek_split_rule(self, criterion='gini'): if criterion == 'gini': metric = gini eval_gain = purity_gain elif criterion == 'entropy': metric = entropy eval_gain = info_gain else: raise ValueError('%s is not a valid partition criterion' % criterion) best_gain = 0 current_metric_val = metric(self.X) for i in self.feat_indices: # Extract unique values from dataset in a given feature/column. values = set([x[i] for x in self.X]) for val in values: rule = SplitRule(self.column_names[i], i, val) # Partition the current dataset and check if everything landed on one side. If so, # this is a bad partition, don't consider it. true_set, false_set = partition(self.X, rule) if len(true_set) == 0 or len(false_set) == 0: continue gain = eval_gain([true_set, false_set], current_metric_val, len(self.X)) if gain >= best_gain: best_gain, self.rule = gain, rule
def save_all(users, binsize=1000): first, users = peek(iter(users)) db = first.db for chunk in partition(users, binsize): batch = leveldb.WriteBatch() for user in takewhile(operator.truth, chunk): user.save() db.Write(batch, sync=True)
def selectKth(ar,k,left,right): idx = selectPivotIndex(ar,left,right) pivoIndex = partition(ar, left, right, idx) if (left + k - 1) == pivoIndex: return pivoIndex if left + k - 1 < pivoIndex: return selectKth(ar, k, left, pivoIndex-1) else: return selectKth(ar, k - (pivoIndex - left+1), pivoIndex + 1, right)
def split(self): self.seek_split_rule() if self.rule is None: return None true_set, false_set = partition(self.X, self.rule) self.true_branch = TreeNode(self.column_names, X=true_set) self.false_branch = TreeNode(self.column_names, X=false_set) return self.true_branch, self.false_branch
def english_probability(text): """ Returns a float representing the likelihood that the given text is a plaintext written in English. Range: (0.0 - 1.0), higher is better. """ # Ignore whitespace (revisit this later). text = text.upper() letters, other = partition(lambda c: c in ENGLISH_FREQUENCIES, text) if not letters: return 0.0 # Expect roughly 15% of text to be spaces. spaces, other = partition(lambda c: c.isspace(), other) space_error = abs(float(len(spaces))/len(text) - 0.15) # As a rough approximation, expect 2% of characters to be punctuation. punc_error = abs(float(len(other))/len(text) - 0.02) counts = Counter(text) letter_error = 0.0 for c, target_freq in ENGLISH_FREQUENCIES.items(): letter_error += (target_freq * abs(float(counts.get(c, 0))/len(letters) - target_freq)) return max(1.0 - (punc_error + letter_error + space_error), 0.0)
def english_probability(text): """ Returns a float representing the likelihood that the given text is a plaintext written in English. Range: (0.0 - 1.0), higher is better. """ # Ignore whitespace (revisit this later). text = text.upper() letters, other = partition(lambda c: c in ENGLISH_FREQUENCIES, text) if not letters: return 0.0 # Expect roughly 15% of text to be spaces. spaces, other = partition(lambda c: c.isspace(), other) space_error = abs(float(len(spaces)) / len(text) - 0.15) # As a rough approximation, expect 2% of characters to be punctuation. punc_error = abs(float(len(other)) / len(text) - 0.02) counts = Counter(text) letter_error = 0.0 for c, target_freq in ENGLISH_FREQUENCIES.items(): letter_error += ( target_freq * abs(float(counts.get(c, 0)) / len(letters) - target_freq)) return max(1.0 - (punc_error + letter_error + space_error), 0.0)
def qSort(A, left, right): minSize = 3 if left < right: pivotIndex = selectPivotIndex(A, left, right) pivotIndex = partition(A, left, right, pivotIndex) if pivotIndex - 1 - left <= minSize: insertion(A, left, pivotIndex - 1) else: qSort(A, left, pivotIndex - 1) if right - pivotIndex - 1 <= minSize: insertion(A, pivotIndex + 1, right) else: qSort(A, pivotIndex + 1, right)
def selectMedian(ar, left, right): k = (right - left + 1) // 2 while k > 0: idx = medianOfMedias(ar, left, right, 1) pivotIndex = partition(ar, left, right, idx) p = left + k if p == pivotIndex: return pivotIndex elif p < pivotIndex: right = pivotIndex - 1 else: k = k - (pivotIndex - left + 1) left = pivotIndex + 1 return left
def hw_to_center(a): """Convert (top left, width, height) bounding box to (center, width, height) bounding box. Args: param1 (arr): [xmin, ymin, width, height] where (xmin, ymin) represents the top left of the bounding box Returns: arr: [center_x, center_y, width, height] """ bbs = util.partition(a, 4) bbs = [[a[0] + a[2] / 2, a[1] + a[3] / 2, a[2], a[3]] for a in bbs] return np.concatenate(bbs)
def main(a_few_tags=False): if a_few_tags: A_FEW_TAGS = ['contemporary gospel']#,'yellow','a blues song form','country influences',"post rock","1970s"] Combiner(only_these_tags=set(A_FEW_TAGS), production_run=True).fill_in_zeros() else: # We run out of memory trying to do all tags at once, so just do 2000 at a time. N_TAGS_PER_ROUND = 2000 PRODUCTION_RUN = True combiner = Combiner(production_run=PRODUCTION_RUN) all_tags = list(combiner.only_these_tags) tag_groups = util.partition(all_tags, N_TAGS_PER_ROUND) overwrite_final_tab_file = True for group in tag_groups: group_combiner = Combiner(production_run=PRODUCTION_RUN, only_these_tags=set(group), overwrite_final_tab_file=overwrite_final_tab_file) overwrite_final_tab_file = False # From now on, we'll just append to the current one. group_combiner.fill_in_zeros()
def corners_to_center(a): """Convert (top left, bottom right) bounding box to (center, width, height) bounding box. Args: param1 (arr): [xmin, ymin, xmax, ymax] where (xmin, ymin) and (xmax, ymax) represent the top left and bottom right corners of the bounding box Returns: arr: [center_x, center_y, width, height] """ bbs = util.partition(a, 4) bbs = [[(a[0] + a[2]) / 2, (a[1] + a[3]) / 2, int(a[2] - a[0]), int(a[3] - a[1])] for a in bbs] return np.concatenate(bbs)
def center_to_hw(a): """Convert (center, width, height) bounding box to (top left, width, height) bounding box. Args: param1 (arr): [center_x, center_y, width, height] where (center_x, center_y) represents the center of the bounding box Returns: arr: [xmin, ymin, width, height] """ bbs = util.partition(a, 4) bbs = [[int(a[0] - a[2] / 2), int(a[1] - a[3] / 2), int(a[2]), int(a[3])] for a in bbs] return np.concatenate(bbs)
def notify(self, time, data, isAsync = True): # purge expired subscribers self.purgeSubscribers() for time, bucket in self.subscribers.items(): index = partition(bucket, lambda sub: time < sub[0] or time > sub[1]) data = [data] for i in range(index, len(bucket)): callback = bucket[i][2] if isAsync: reactor.callLater(0, callback, data) else: callback(data) del bucket[index:] if len(bucket) == 0: del self.subscribers[time]
def transform_y(self, y, x): if not isinstance(y, md.StructuredLabel): return y for i, label in enumerate(y): data, data_type = label if (data_type == md.LabelType.BOUNDING_BOX): r, c, *_ = x.shape boxes = util.partition( data, 4) # Partition into array of bounding boxes masks = [center_to_mask(bb, x) for bb in boxes] # Create masks from bounding boxes trfms = [self.transform_x(mask) for mask in masks] # Transform masks y[i] = ( np.concatenate([mask_to_center(t) for t in trfms]), data_type ) # Convert masks back into bounding boxes, save result return y
def gen_type_sk(sk_dir, bases): buf = cStringIO.StringIO() buf.write("package type;\n") buf.write(_const) cols, decls = util.partition(lambda c: util.is_collection(c.name), bases) decls = filter(lambda c: not util.is_array(c.name), decls) itfs, clss = util.partition(op.attrgetter("is_itf"), decls) logging.debug("# interface(s): {}".format(len(itfs))) logging.debug("# class(es): {}".format(len(clss))) # convert interfaces first, then usual classes buf.write('\n'.join(util.ffilter(map(to_struct, itfs)))) buf.write('\n'.join(util.ffilter(map(to_struct, clss)))) # convert collections at last logging.debug("# collection(s): {}".format(len(cols))) buf.write('\n'.join(map(col_to_struct, cols))) # argument number of methods arg_num = map(lambda mtd: len(mtd.params), methods()) buf.write(""" #define _{0} {{ {1} }} int {0}(int id) {{ return _{0}[id]; }} """.format(C.typ.argNum, ", ".join(map(str, arg_num)))) # argument types of methods def get_args_typ(mtd): def get_arg_typ(param): return str(class_lookup(param[0]).id) return '{' + ", ".join(map(get_arg_typ, mtd.params)) + '}' args_typ = map(get_args_typ, methods()) buf.write(""" #define _{0} {{ {1} }} int {0}(int id, int idx) {{ return _{0}[id][idx]; }} """.format(C.typ.argType, ", ".join(args_typ))) # return type of methods def get_ret_typ(mtd): cls = class_lookup(mtd.typ) if cls: return cls.id else: return -1 ret_typ = map(get_ret_typ, methods()) buf.write(""" #define _{0} {{ {1} }} int {0}(int id) {{ return _{0}[id]; }} """.format(C.typ.retType, ", ".join(map(str, ret_typ)))) # belonging class of methods belongs = map(lambda mtd: mtd.clazz.id, methods()) buf.write(""" #define _{0} {{ {1} }} int {0}(int id) {{ return _{0}[id]; }} """.format(C.typ.belongsTo, ", ".join(map(str, belongs)))) subcls = \ map(lambda cls_i: '{' + ", ".join( \ map(lambda cls_j: str(cls_i <= cls_j).lower(), classes()) \ ) + '}', classes()) buf.write(""" #define _{0} {{ {1} }} bit {0}(int i, int j) {{ return _{0}[i][j]; }} """.format(C.typ.subcls, ", ".join(subcls))) ## sub type relations #subcls = [] #for cls_i in classes(): # row = [] # for cls_j in classes(): # row.append(int(cls_i <= cls_j)) # subcls.append(row) ## sub type relations in yale format #_, IA, JA = util.yale_format(subcls) #li, lj = len(IA), len(JA) #si = ", ".join(map(str, IA)) #sj = ", ".join(map(str, JA)) #buf.write(""" # #define _iA {{ {si} }} # #define _jA {{ {sj} }} # int iA(int i) {{ # return _iA[i]; # }} # int jA(int j) {{ # return _jA[j]; # }} # bit subcls(int i, int j) {{ # int col_i = iA(i); # int col_j = iA(i+1); # for (int col = col_i; col < col_j; col++) {{ # if (j == jA(col)) return true; # }} # return false; # }} #""".format(**locals())) with open(os.path.join(sk_dir, "type.sk"), 'w') as f: f.write(buf.getvalue()) logging.info("encoding " + f.name) buf.close()
def to_struct(cls): # make mappings from static fields to corresponding accessors def gen_s_flds_accessors(cls): s_flds = filter(op.attrgetter("is_static"), cls.flds) global _s_flds for fld in ifilterfalse(op.attrgetter("is_private"), s_flds): cname = fld.clazz.name fid = '.'.join([cname, fld.name]) fname = unicode(repr(fld)) logging.debug("{} => {}".format(fid, fname)) _s_flds[fid] = fname cname = util.sanitize_ty(cls.name) global _ty # if this is an interface, merge this into another family of classes # as long as classes that implement this interface are in the same family if cls.is_itf: # interface may have static constants gen_s_flds_accessors(cls) subss = util.flatten_classes(cls.subs, "subs") bases = util.rm_dup(map(lambda sub: find_base(sub), subss)) # filter out interfaces that extend other interfaces, e.g., Action base_clss, _ = util.partition(op.attrgetter("is_class"), bases) if not base_clss: logging.debug("no implementer of {}".format(cname)) elif len(base_clss) > 1: logging.debug("ambiguous inheritance of {}: {}".format(cname, base_clss)) else: # len(base_clss) == 1 base = base_clss[0] base_name = base.name logging.debug("{} => {}".format(cname, base_name)) _ty[cname] = base_name if cls.is_inner: # to handle inner interface w/ outer class name logging.debug("{} => {}".format(repr(cls), base_name)) _ty[unicode(repr(cls))] = base_name return '' # if this is the base class having subclasses, # make a virtual struct first if cls.subs: cls = to_v_struct(cls) cname = cls.name # cls can be modified above, thus generate static fields accessors here gen_s_flds_accessors(cls) # for unique class numbering, add an identity mapping if cname not in _ty: _ty[cname] = cname buf = cStringIO.StringIO() buf.write("struct " + cname + " {\n int hash;\n") # to avoid static fields, which will be bound to a class-representing package _, i_flds = util.partition(op.attrgetter("is_static"), cls.flds) buf.write('\n'.join(map(trans_fld, i_flds))) if len(i_flds) > 0: buf.write('\n') buf.write("}\n") return buf.getvalue()
def to_sk(pgr, sk_dir): # clean up result directory if os.path.isdir(sk_dir): util.clean_dir(sk_dir) else: os.makedirs(sk_dir) # reset global variables so that we can run this encoding phase per demo reset() # update global constants # TODO: conservative analysis of possible length of collections # TODO: counting .add() calls or something? magic_S = 7 global _const _const = u""" int S = {}; // length of arrays for Java collections """.format(magic_S) # type.sk logging.info("building class hierarchy") pgr.consist() # merge all classes and interfaces, except for primitive types clss, _ = util.partition(lambda c: util.is_class_name(c.name), classes()) bases = rm_subs(clss) gen_type_sk(sk_dir, bases) # cls.sk cls_sks = [] for cls in pgr.classes: # skip the collections, which will be encoded at type.sk if repr(cls).split('_')[0] in C.collections: continue cls_sk = gen_cls_sk(sk_dir, cls) if cls_sk: cls_sks.append(cls_sk) # log.sk gen_log_sk(sk_dir, pgr) # main.sk that imports all the other sketch files buf = cStringIO.StringIO() # --bnd-cbits: the number of bits for integer holes bits = max(5, int(math.ceil(math.log(len(methods()), 2)))) buf.write("pragma options \"--bnd-cbits {}\";\n".format(bits)) # --bnd-unroll-amnt: the unroll amount for loops unroll_amnt = None # use a default value if not set unroll_amnt = magic_S # TODO: other criteria? if unroll_amnt: buf.write("pragma options \"--bnd-unroll-amnt {}\";\n".format(unroll_amnt)) # --bnd-inline-amnt: bounds inlining to n levels of recursion inline_amnt = None # use a default value if not set # setting it 1 means there is no recursion if inline_amnt: buf.write("pragma options \"--bnd-inline-amnt {}\";\n".format(inline_amnt)) buf.write("pragma options \"--bnd-bound-mode CALLSITE\";\n") sks = ["log.sk", "type.sk"] + cls_sks for sk in sks: buf.write("include \"{}\";\n".format(sk)) # TODO: make harness (if not exists) with open(os.path.join(sk_dir, "main.sk"), 'w') as f: f.write(buf.getvalue()) logging.info("encoding " + f.name) buf.close()
def toggle(onlights, pos1, pos2): turned_off_lights, turned_on_lights = util.partition( lambda light: light in onlights, create_lights(pos1, pos2) ) return onlights.difference(turned_on_lights).union(turned_off_lights)
def wrapper(r): args = map(util.binstr2int, util.partition(r, self.args_bit_size)) log(" > %s@%d | %s" % (name, regs.PC, ' '.join(map(str, args)))) return fn(*args)
# paths to folders containing covid positive and coivd negative patients POSITIVE_CLASS_PATH = r'covid-positive' NEGATIVE_CLASS_PATH = r'covid-negative' start_time = time.time() op = webdriver.ChromeOptions() op.add_argument('headless') drivers = [webdriver.Chrome(ChromeDriverManager().install()) for i in range(NUM_PEERS)] # drivers = [webdriver.Chrome(ChromeDriverManager().install(), opt) for i in range(NUM_PEERS)] positive_files = get_files(POSITIVE_CLASS_PATH, NUM_IMAGES, '.png') negative_files = get_files(NEGATIVE_CLASS_PATH, NUM_IMAGES, '.png') if DATA_SPLIT == 'partition': pos_partitions = partition(positive_files, NUM_PEERS) neg_partitions = partition(negative_files, NUM_PEERS) elif DATA_SPLIT == 'rpartition': pos_partitions = r_partition(positive_files, NUM_PEERS) neg_partitions = r_partition(negative_files, NUM_PEERS) elif DATA_SPLIT == 'spartition': pos_partitions = s_partition(positive_files, RATIOS) neg_partitions = s_partition(negative_files, RATIOS) for index, driver in enumerate(drivers): # Click 'Start Building' on home page find_task_page(driver, PLATFORM, TASK_NAME, TRAINING_MODE) # Upload files on Task Training time.sleep(6) if DATA_SPLIT == 'iid':
# Defines the way to split the data,could be 'iid' for iid data, 'partition' for even size partitions, 'rparition' for random size partitions # 'spartition' for partition of sizes past as argument RATIOS DATA_SPLIT = 'rpartition' RATIOS = [0.5, 0.3, 0.2] start_time = time.time() drivers = [ webdriver.Chrome(ChromeDriverManager().install()) for i in range(NUM_PEERS) ] data = read_csv(CSV_FILE_PATH) header = data[0] if DATA_SPLIT == 'partition': res = partition(data, NUM_PEERS) for index, r in enumerate(res): create_csv(header, r, f"{index}_partition.csv") elif DATA_SPLIT == 'rpartition': res = r_partition(data, NUM_PEERS) for index, r in enumerate(res): create_csv(header, r, f"{index}_partition.csv") elif DATA_SPLIT == 'spartition': res = s_partition(data, RATIOS) for index, r in enumerate(res): create_csv(header, r, f"{index}_partition.csv") for index, driver in enumerate(drivers): # Click 'Start Building' on home page find_task_page(driver, PLATFORM, TASK_NAME, TRAINING_MODE)
def IOs(self): ios, _ = util.partition(lambda s: isinstance(s, CallBase), self._logs) return ios
# paths to the file containing the CSV file of Titanic passengers with 12 columns IMAGE_FILE_PATH = r'CIFAR10' LABEL_FILE_PATH = 'labels.csv' NUM_IMAGES = 10 # Download and extract chromedriver from here: https://sites.google.com/a/chromium.org/chromedriver/downloads op = webdriver.ChromeOptions() op.add_argument('headless') # You can add options=op for chrome headless mode # drivers = [webdriver.Chrome(ChromeDriverManager().install(), options=op) for i in range(NUM_PEERS)] drivers = [webdriver.Chrome(ChromeDriverManager().install()) for i in range(NUM_PEERS)] start_time = time.time() if DATA_SPLIT == 'partition': partitions = partition(get_files(IMAGE_FILE_PATH, NUM_IMAGES, '.png'), NUM_PEERS) elif DATA_SPLIT == 'spartition': partitions = s_partition(get_files(IMAGE_FILE_PATH, NUM_IMAGES, '.png'), RATIOS) elif DATA_SPLIT == 'rpartition': partitions = r_partition(get_files(IMAGE_FILE_PATH, NUM_IMAGES, '.png'), NUM_PEERS) for index, driver in enumerate(drivers): find_task_page(driver, PLATFORM, TASK_NAME, TRAINING_MODE) # Upload files on Task Training time.sleep(6) if DATA_SPLIT != 'iid': driver.find_element_by_id('hidden-input_cifar10_Images').send_keys(' \n '.join(partitions[index])) driver.find_element_by_id('hidden-input_cifar10_Labels').send_keys(os.path.abspath(LABEL_FILE_PATH))
def to_struct(cls): # make mappings from static fields to corresponding accessors def gen_s_flds_accessors(cls): s_flds = filter(op.attrgetter("is_static"), cls.flds) global _s_flds for fld in ifilterfalse(op.attrgetter("is_private"), s_flds): cname = fld.clazz.name fid = '.'.join([cname, fld.name]) fname = unicode(repr(fld)) logging.debug("{} => {}".format(fid, fname)) _s_flds[fid] = fname cname = util.sanitize_ty(cls.name) global _ty # if this is an interface, merge this into another family of classes # as long as classes that implement this interface are in the same family if cls.is_itf: # interface may have static constants gen_s_flds_accessors(cls) subss = util.flatten_classes(cls.subs, "subs") bases = util.rm_dup(map(lambda sub: find_base(sub), subss)) # filter out interfaces that extend other interfaces, e.g., Action base_clss, _ = util.partition(op.attrgetter("is_class"), bases) if not base_clss: logging.debug("no implementer of {}".format(cname)) elif len(base_clss) > 1: logging.debug("ambiguous inheritance of {}: {}".format(cname, base_clss)) else: # len(base_clss) == 1 base = base_clss[0] base_name = base.name logging.debug("{} => {}".format(cname, base_name)) _ty[cname] = base_name if cls.is_inner: # to handle inner interface w/ outer class name logging.debug("{} => {}".format(repr(cls), base_name)) _ty[unicode(repr(cls))] = base_name return '' # if this is the base class having subclasses, # make a virtual struct first if cls.subs and not cls.is_aux: cls = to_v_struct(cls) cname = cls.name # cls can be modified above, thus generate static fields accessors here gen_s_flds_accessors(cls) # for unique class numbering, add an identity mapping if cname not in _ty: _ty[cname] = cname buf = cStringIO.StringIO() buf.write("struct " + cname + " {\n int hash;\n") # to avoid static fields, which will be bound to a class-representing package _, i_flds = util.partition(op.attrgetter("is_static"), cls.flds) buf.write('\n'.join(map(trans_fld, i_flds))) if len(i_flds) > 0: buf.write('\n') buf.write("}\n") return buf.getvalue()
def sample_sort(self,data,n_samples,comm): #n_samples es el número de muestras que se tomaran #n_samples>comm.Get_size() #m lo dejaremos como comm.Get_size()-1 rank = comm.Get_rank() # n is the number of processes n = comm.Get_size() #### 1. Choose a random sample from the data array if (rank == 0): indices = [i for i in range(len(data))] random_indices = [] for i in range(n_samples): r = random.randrange(len(indices)) random_indices.append(indices.pop(r)) sample = [] for index in random_indices: sample.append(data[index]) else: sample = None sample = comm.bcast(sample, root = 0) #### 2. Distributively sort the sample using merge sort sorted_sample = self.bucket_sort(sample, comm) #### 3. Choose n -1 bucket separators if (rank == 0): for i in range(n_samples - n + 1): size = len(sorted_sample) del sorted_sample[random.randrange(size)] print ('separators ' + str(sorted_sample)) sorted_sample = comm.bcast(sorted_sample, root = 0) #### 4. Distribute data in buckets if (rank == 0): data_partition = util.partition(data, n) else: data_partition = None data_part = comm.scatter(data_partition, root = 0) print ('data parts ' + str(data_part)) ###### 4.1 each bucket assigns the corresponding bucket of a group of ###### elements assignation = [] ## TODO BORRAR ELEMENTOS DEL DATAPART for element in data_part: if (element > sorted_sample[-1]): assignation.append((element, n - 1)) else: for i in range((len(sorted_sample))): separator = sorted_sample[i] if (element <= separator): assignation.append((element, i)) break print ('assignation rank:' + str(rank) + ' = ' + str(assignation)) ###### 4.2 First node collects the assignations and sends elements to ###### their corresponding bucket assignations = comm.gather(assignation, root = 0) if (rank == 0): assignations = np.concatenate(assignations).tolist() temp = [[] for i in range(n)] for element, bucket_id in assignations: temp[bucket_id].append(element) for bucket in temp: for i in range(n - 1): comm.send(temp[i + 1], dest = (i + 1), tag = 2) data = temp[0] else: data = [] data = comm.recv(source = 0, tag = 2) print ('bucket ' + str(rank) + ' -> ' + str(data)) #### 5. Sort in each bucket and collect it data.sort() data = comm.gather(data, root = 0) if (rank == 0): print ('sorted data: ' + str(np.concatenate(data))) return np.concatenate(data).tolist()
start_time = time.time() op = webdriver.ChromeOptions() op.add_argument('headless') drivers = [ webdriver.Chrome(ChromeDriverManager().install()) for i in range(NUM_PEERS) ] digit_files = [ get_files(DIGIT_CLASS_PATHS[i], NUM_IMAGES, '.jpg') for i in range(len(DIGIT_CLASS_PATHS)) ] if DATA_SPLIT == 'partition': digit_partitions = [ partition(digit_files[i], NUM_PEERS) for i in range(len(digit_files)) ] elif DATA_SPLIT == 'rpartition': digit_partitions = [ r_partition(digit_files[i], NUM_PEERS) for i in range(len(digit_files)) ] elif DATA_SPLIT == 'spartition': digit_partitions = [ s_partition(digit_files[i], RATIOS) for i in range(len(digit_files)) ] for index, driver in enumerate(drivers): find_task_page(driver, PLATFORM, TASK_NAME, TRAINING_MODE) time.sleep(8) if DATA_SPLIT == 'iid': for i in range(len(DIGIT_CLASS_PATHS)):
savepath = '../parses/eps-40k-ml10-3trans/' predictpath = 'prediction/eps-40k-ml10-3trans/second/minibatch=10/' parses = [load_parses_separate(savepath, k) for k in range(28000)] # Optional: training on parses with non-empty ref-forests. cleaned_parses = [(target_forest, ref_forest, src_fsa, tgt_sent) for (target_forest, ref_forest, src_fsa, tgt_sent) in parses if ref_forest] print(len(cleaned_parses)) lexicon = load_lexicon(savepath) fset = load_featureset(savepath) # initialize weights uniformly w_init = defaultdict(float) for feature in fset: w_init[feature] = 1e-2 k = 10 minibatches = partition(cleaned_parses, k) w_trained, delta_ws, likelihoods = sgd_minibatches(iters=5, delta_0=10, w=w_init, minibatches=minibatches, batch_size=k, parses=cleaned_parses, shuffle=True, sparse=True, scale_weight=2, regularizer=1, lmbda=1, bar=True, log=False, log_last=False, check_convergence=False, savepath=False, prediction=predictpath, prediction_length=20) print(likelihoods) # printing for verification w = w_trained[-1] for k, v in sorted(w.items(), key=lambda x: x[1], reverse=True): print('{}'.format(k).ljust(25) + '{}'.format(v))
def evts(self): _, evts = util.partition(lambda s: isinstance(s, CallBase), self._logs) return evts
for x in input[1 : ]: if characters[-1] == '\\': characters[-1] = characters[-1] + x elif characters[-1] in ["\\'", '\\`']: characters[-1] = characters[-1] + x elif characters[-1].startswith(("\\'", '\\`')): if len(characters[-1]) == 2: characters[-1] = characters[-1] + x else: characters.append(x) elif characters[-1].startswith('\\'): if characters[-1].endswith(' '): characters.append(x) else: characters[-1] = characters[-1] + x elif x == 'v': characters[-1] = characters[-1] + x else: characters.append(x) parts = partition(characters, [7, 33], cyclic = True, action = 'new') result = [] for i, part in enumerate(parts): glued = ''.join(part) if i % 2 == 0: result.append(r'\textcolor{red}{%s}' % glued) else: result.append(r'\textcolor{white}{%s}' % glued) file('1.txt', 'w').write(''.join(result))
def sgd_minibatches(iters, delta_0, w, minibatches=[], parses=[], batch_size=20, sparse=False, log=False, bar=True, prob_log=False, log_last=False, check_convergence=False, scale_weight=False, regularizer=False, lmbda=2.0, savepath=False, prediction=False, shuffle=False, prediction_length=10): """ Performs stochastic gradient descent on the weights vector w on minibatches = [minibatch_1, minibatch_2,....,minibatch_N]. We are decaying the learning rate after each minibatch. We follow the following rule from http://cilvr.cs.nyu.edu/diglib/lsml/bottou-sgd-tricks-2012.pdf section 5.2: delta_k = delta_0 * (1 + delta_0*lmbda*k)**(−1) where k is the index of the minibatch and delta_0 is the initial learning rate, and lmbda is another hyperparameter that controls the rate of decay. """ likelihoods = list() avg_likelihoods = list() ws = [] delta_ws = [] for i in range(iters): print('Iteration {0}/{1}'.format(i+1, iters)) learning_rates = list() if bar and not (i==iters-1 and log_last): bar = progressbar.ProgressBar(max_value=len(minibatches)) if shuffle: minibatches = partition(random.sample(parses, len(parses)), batch_size) for k, minibatch in enumerate(minibatches): delta_w = 0.0 w_new = defaultdict(float) delta_k = delta_0 * (1 + delta_0*(lmbda*(i*len(minibatches)+k)))**(-1) # this is delta_k = delta_0 when k=0 and i=0 learning_rates.append(delta_k) if bar and not (i==iters-1 and log_last): bar.update(k) for m, parse in enumerate(minibatch): # unpack parse target_forest, ref_forest, src_fsa, tgt_sent = parse ### D_n(x) ### tgt_edge2fmap, _ = featurize_edges(target_forest, src_fsa, tgt_sent=tgt_sent, sparse_del=sparse, sparse_ins=sparse, sparse_trans=sparse) # recompute edge weights tgt_edge_weights = {edge: np.exp(weight_function(edge, tgt_edge2fmap[edge], w)) for edge in target_forest} # compute inside and outside tgt_tsort = top_sort(target_forest) root_tgt = Nonterminal("D_n(x)") I_tgt = inside_algorithm(target_forest, tgt_tsort, tgt_edge_weights) O_tgt = outside_algorithm(target_forest, tgt_tsort, tgt_edge_weights, I_tgt, root_tgt) # compute expected features expected_features_Dn_x = expected_feature_vector(target_forest, I_tgt, O_tgt, tgt_edge2fmap) ### D(x,y) ### ref_edge2fmap, _ = featurize_edges(ref_forest, src_fsa, tgt_sent=tgt_sent, sparse_del=sparse, sparse_ins=sparse, sparse_trans=sparse) # recompute edge weights ref_edge_weights = {edge: np.exp(weight_function(edge, ref_edge2fmap[edge], w)) for edge in ref_forest} # compute inside and outside tsort = top_sort(ref_forest) root_ref = Nonterminal("D(x,y)") I_ref = inside_algorithm(ref_forest, tsort, ref_edge_weights) O_ref = outside_algorithm(ref_forest, tsort, ref_edge_weights, I_ref, root_ref) # compute expected features expected_features_D_xy = expected_feature_vector(ref_forest, I_ref, O_ref, ref_edge2fmap) # update w w_step, d_w = update_w(w, expected_features_D_xy, expected_features_Dn_x, delta=delta_k, regularizer=regularizer) # store likelihoods if I_ref and I_tgt: # for the case of an empty forest! since log(0) = -inf # compute the likelihood of the target sentence l = np.log(I_ref[root_ref]) - np.log(I_tgt[root_tgt]) if np.isfinite(l): likelihoods.append(l) else: likelihoods.append(likelihoods[-1]) else: likelihoods.append(likelihoods[-1]) avg_likelihood = sum(likelihoods) / len(likelihoods) avg_likelihoods.append(avg_likelihood) # the update is averaged over the minibatch delta_w += d_w / len(minibatch) for feature, value in w_step.items(): w_new[feature] += value / len(minibatch) if log or (i==iters-1 and log_last): print("x = '{}'".format(src_fsa.sent)) print("y = '{}'".format(tgt_sent)) print('Viterbi') d = viterbi(target_forest, tgt_tsort, tgt_edge_weights, I_tgt, root_tgt) # use exp! candidates = write_derrivation(d) print("Best y = '{}'".format(candidates.pop())) print('P(y,d|x) = {}'.format(joint_prob(d, tgt_edge_weights, I_tgt, root_tgt, log=prob_log))) n = 100 d, count = ancestral_sample(n, target_forest, tgt_tsort, tgt_edge_weights, I_tgt, root_tgt) # use exp! candidates = write_derrivation(d) print('Most sampled: {0}/{1}'.format(count, n)) print("Best y = '{}'".format(candidates.pop())) print('P(y,d|x) = {}\n'.format(joint_prob(d, tgt_edge_weights, I_tgt, root_tgt, log=prob_log))) if bar and not (i==iters-1 and log_last): bar.update(k+1) # hack: scale weights so that they are at most of the scale 10**scale_weight if scale_weight: abs_max = max(map(abs, w_new.values())) if np.isfinite(abs_max): for k, v in w_new.items(): w_new[k] = v / 10**(int(np.log10(abs_max))+1 - scale_weight) # update w = w_new else: # return to previous weight print('inf or nan') w = ws[-2] print(tgt_sent) # update after each minibatch # w = w_new ws.append(w) delta_ws.append(delta_w) if bar and not (i==iters-1 and log_last): bar.finish() if savepath: save_weights(w, savepath + 'trained-{}-'.format(i+1)) if check_convergence: print('delta w: {}\n'.format([ds / len(w.keys()) for ds in delta_ws])) print('Learning rates: {}'.format(learning_rates)) # if prediction and i%5==0: # save every 5 iterations predict(parses[0:prediction_length], w, i+1, prediction) return ws, delta_ws, avg_likelihoods
def scan_and_update_history(fs, fs_root, root_mark, path_filter, hash_type, history_store, peerid, groupids, clock, slog): with slog.time("read history") as rt: history_entries = history_store.read_entries(peerid) rt.set_result({"history entries": len(history_entries)}) with slog.time("scan files") as rt: file_stats = list(fs.list_stats( fs_root, root_mark, names_to_ignore = path_filter.names_to_ignore)) rt.set_result({"file stats": len(file_stats)}) with slog.time("diff file stats") as rt: fdiffs = diff_file_stats(file_stats, history_entries, groupids, slog) ignored_fdiffs, fdiffs = partition(fdiffs, lambda fdiff: path_filter.ignore_path(fdiff.rpath.full)) slog.ignored_rpaths(fdiff.rpath for fdiff in ignored_fdiffs) rt.set_result({"file diffs": len(fdiffs)}) with slog.time("hash files") as rt: hashed_fdiffs = list(hash_file_diffs(fs, fdiffs, hash_type, slog)) rt.set_result({"hashed file diffs": len(hashed_fdiffs)}) # We rescan the files to make sure they are stable. We might # decided to do this before hashing if there are lots of big # unstable files. But I think we'll usually be stable. with slog.time("rescan files") as rt: rescan_stats = list(fs.stats( (fdiff.rpath for fdiff in hashed_fdiffs))) rt.set_result({"rescanned file stats": len(rescan_stats)}) with slog.time("check change stability") as rt: rescan_stats_by_rpath = dict((rpath, (size, mtime)) for rpath, size, mtime in file_stats) def is_stable(fdiff): (rescan_size, rescan_mtime) = rescan_stats_by_rpath.get( fdiff.rpath, (DELETED_SIZE, DELETED_MTIME)) return fdiff.size == rescan_size and \ mtimes_eq(fdiff.mtime, rescan_mtime) stable_fdiffs, unstable_fdiffs = partition(hashed_fdiffs, is_stable) rt.set_result({"stable file diffs": len(stable_fdiffs), "unstable file diffs": len(unstable_fdiffs)}) with slog.time("insert new history entries"): new_entries = list(new_history_entries_from_file_diffs( stable_fdiffs, peerid, clock)) if new_entries: history_store.add_entries(new_entries) # Techincally, we don't have to do this, but it's nice to log this # after every scan. with slog.time("reread history") as rt: history_entries = history_store.read_entries(peerid) history_by_gpath = group_history_by_gpath(history_entries) total_size = sum(history.latest.size for history in history_by_gpath.itervalues()) rt.set_result({"path count": len(history_by_gpath), "total size": total_size}) return history_entries
def to_sk(cmd, smpls, tmpl, sk_dir): # clean up result directory if os.path.isdir(sk_dir): util.clean_dir(sk_dir) else: os.makedirs(sk_dir) # reset global variables so that we can run this encoding phase per demo reset() # update global constants def logged(mtd): if mtd.is_init: return False clss = util.flatten_classes([mtd.clazz], "subs") return sample.mtd_appears(smpls, clss, mtd.name) mtds = filter(logged, methods()) if mtds: n_params = 2 + max(map(len, map(op.attrgetter("params"), mtds))) else: # no meaningful logs in the sample? n_params = 2 n_evts = sample.max_evts(smpls) if cmd == "android": n_views = sample.max_views(smpls) magic_S = max(3, n_evts + 1, n_views) else: magic_S = max(5, n_evts + 1) # at least 5, just in case n_ios = sample.max_IOs(smpls) global _const _const = u""" int P = {}; // length of parameters (0: (>|<)mid, 1: receiver, 2...) int S = {}; // length of arrays for Java collections int N = {}; // length of logs """.format(n_params, magic_S, n_ios) # type.sk logging.info("building class hierarchy") tmpl.consist() # merge all classes and interfaces, except for primitive types clss, _ = util.partition(lambda c: util.is_class_name(c.name), classes()) bases = rm_subs(clss) gen_type_sk(sk_dir, bases) # cls.sk cls_sks = [] for cls in tmpl.classes: # skip the collections, which will be encoded at type.sk if repr(cls).split('_')[0] in C.collections: continue cls_sk = gen_cls_sk(sk_dir, smpls, cls) if cls_sk: cls_sks.append(cls_sk) # sample_x.sk smpl_sks = [] for smpl in smpls: smpl_sk = "sample_" + smpl.name + ".sk" smpl_sks.append(smpl_sk) sk_path = os.path.join(sk_dir, smpl_sk) gen_smpl_sk(sk_path, smpl, tmpl, tmpl.harness(smpl.name)) # log.sk gen_log_sk(sk_dir, tmpl) # sample.sk that imports all the other sketch files buf = cStringIO.StringIO() # deprecated as we use regex generator for class/method roles ## --bnd-cbits: the number of bits for integer holes #bits = max(5, int(math.ceil(math.log(len(methods()), 2)))) #buf.write("pragma options \"--bnd-cbits {}\";\n".format(bits)) # --bnd-unroll-amnt: the unroll amount for loops unroll_amnt = max(n_params, magic_S) buf.write("pragma options \"--bnd-unroll-amnt {}\";\n".format(unroll_amnt)) # --bnd-inline-amnt: bounds inlining to n levels of recursion inline_amnt = None # use a default value if not set if cmd == "android": #inline_amnt = 2 # depth of View hierarchy (at findViewByTraversal) inline_amnt = 1 # no recursion for flat Views elif cmd == "gui": # setting it 1 means there is no recursion inline_amnt = 1 if inline_amnt: buf.write("pragma options \"--bnd-inline-amnt {}\";\n".format(inline_amnt)) buf.write("pragma options \"--bnd-bound-mode CALLSITE\";\n") sks = ["log.sk", "type.sk"] + cls_sks + smpl_sks for sk in sks: buf.write("include \"{}\";\n".format(sk)) with open(os.path.join(sk_dir, "sample.sk"), 'w') as f: f.write(buf.getvalue()) logging.info("encoding " + f.name) buf.close()
def gen_type_sk(sk_dir, bases): buf = cStringIO.StringIO() buf.write("package type;\n") buf.write(_const) buf.write(trans_lib()) buf.write('\n') cols, decls = util.partition(lambda c: util.is_collection(c.name), bases) decls = filter(lambda c: not util.is_array(c.name), decls) itfs, clss = util.partition(op.attrgetter("is_itf"), decls) logging.debug("# interface(s): {}".format(len(itfs))) logging.debug("# class(es): {}".format(len(clss))) # convert interfaces first, then usual classes buf.write('\n'.join(util.ffilter(map(to_struct, itfs)))) buf.write('\n'.join(util.ffilter(map(to_struct, clss)))) # convert collections at last logging.debug("# collection(s): {}".format(len(cols))) buf.write('\n'.join(map(col_to_struct, cols))) # argument number of methods arg_num = map(lambda mtd: len(mtd.params), methods()) buf.write(""" #define _{0} {{ {1} }} int {0}(int id) {{ return _{0}[id]; }} """.format(C.typ.argNum, ", ".join(map(str, arg_num)))) # argument types of methods def get_args_typ(mtd): def get_arg_typ(param): return str(class_lookup(param[0]).id) return '{' + ", ".join(map(get_arg_typ, mtd.params)) + '}' args_typ = map(get_args_typ, methods()) buf.write(""" #define _{0} {{ {1} }} int {0}(int id, int idx) {{ return _{0}[id][idx]; }} """.format(C.typ.argType, ", ".join(args_typ))) # return type of methods def get_ret_typ(mtd): cls = class_lookup(mtd.typ) if cls: return cls.id else: return -1 ret_typ = map(get_ret_typ, methods()) buf.write(""" #define _{0} {{ {1} }} int {0}(int id) {{ return _{0}[id]; }} """.format(C.typ.retType, ", ".join(map(str, ret_typ)))) # belonging class of methods belongs = map(lambda mtd: mtd.clazz.id, methods()) buf.write(""" #define _{0} {{ {1} }} int {0}(int id) {{ return _{0}[id]; }} """.format(C.typ.belongsTo, ", ".join(map(str, belongs)))) subcls = \ map(lambda cls_i: '{' + ", ".join( \ map(lambda cls_j: str(cls_i <= cls_j).lower(), classes()) \ ) + '}', classes()) buf.write(""" #define _{0} {{ {1} }} bit {0}(int i, int j) {{ return _{0}[i][j]; }} """.format(C.typ.subcls, ", ".join(subcls))) ## sub type relations #subcls = [] #for cls_i in classes(): # row = [] # for cls_j in classes(): # row.append(int(cls_i <= cls_j)) # subcls.append(row) ## sub type relations in yale format #_, IA, JA = util.yale_format(subcls) #li, lj = len(IA), len(JA) #si = ", ".join(map(str, IA)) #sj = ", ".join(map(str, JA)) #buf.write(""" # #define _iA {{ {si} }} # #define _jA {{ {sj} }} # int iA(int i) {{ # return _iA[i]; # }} # int jA(int j) {{ # return _jA[j]; # }} # bit subcls(int i, int j) {{ # int col_i = iA(i); # int col_j = iA(i+1); # for (int col = col_i; col < col_j; col++) {{ # if (j == jA(col)) return true; # }} # return false; # }} #""".format(**locals())) with open(os.path.join(sk_dir, "type.sk"), 'w') as f: f.write(buf.getvalue()) logging.info("encoding " + f.name) buf.close()
print("Calculate average time resolutions...") avg_time_res = map(avg, min_max_t) total_avg_time_res = sum([t for userid, t in avg_time_res])/len(avg_time_res) print("avg time between positions: " + str(total_avg_time_res)) fig = plt.figure() plt.hist([v[1] for v in avg_time_res], bins = 50) plt.close() print("Reading file and counting cell occurencies...") cell_occ = util.MapReduce(user_positions, addByKey, initializer = init) occ = cell_occ(config.USERS) print("Creating cell occurrency graphs...") for userid, celldata in util.partition(map(group_user, occ)): cells = sorted(set([(p[0], p[1]) for p in celldata])) counts = [0] * len(cells) for lat, lon, count in celldata: counts[cells.index((lat,lon))] += count fig = plt.figure() plt.ylabel("# occurrences") plt.bar([c[0] for c in enumerate(counts)], counts, color = (0.0,0.0,0.8,1.0), linewidth= 0.0) plt.savefig("figs/counts/" + userid + ".png") plt.close() print("Creating timeline graphs...") init() for i, traj in enumerate(map(user_trajectory_positions, config.USERS)): userid = traj[0][0] posdata = [(p[1][0], p[1][1], p[1][2]) for p in traj]
def bucket_sort(self, data, comm): #data es el subarreglo asignado al proceso #comm permite comunicarse con los demas procesos rank = comm.Get_rank() # n is the number of processes n = comm.Get_size() #### 1. Find min and max and bucket lenght if (rank == 0): # find the min (a) and the max (b) number in the array max_num = data[0] min_num = data[0] for i in range(len(data)): if (data[i] < min_num): min_num = data[i] elif (data[i] > max_num): max_num = data[i] # broadcast the bucket division found bucket_len = (max_num - min_num) // n # the lower limit for each bucket i is a_i = a + i * bucket_length buckets_info = (min_num, bucket_len) data = util.partition(data, n) else: buckets_info = None buckets_info = comm.bcast(buckets_info, root = 0) #### 2. Each bucket finds in which buckets the assigned elemnts must go # Now we can scatter the array and each process might decide the # corresponding bucket for each number that has assigned_numbers = comm.scatter(data, root = 0) # This array will have a tuple in each position in the form (num, #bucket) assignation = [] # The corresponding bucket number is (num-min)//bucket_length with int min_num = buckets_info[0] bucket_len = buckets_info[1] for num in assigned_numbers: assignation.append((num, ((num - min_num) // bucket_len))) data = comm.gather(assignation, root = 0) #### 3. First node sends the corresponding elements for each bucket if (rank == 0): data = np.concatenate(data) temp = [[] for i in range(n)] for number, bucket_id in data: if (bucket_id == n): bucket_id = n-1 temp[bucket_id].append(number) for i in range(n - 1): comm.send(temp[i + 1], dest = (i + 1), tag = 1) data = temp[0] else: data = [] data = comm.recv(source = 0, tag = 1) #### 4. Each bucket sorts its elements data.sort() #### 5. First node collects the sorted elements, knowing that the next #### bucket always has greater numbers data = comm.gather(data, root = 0) if (comm.rank == 0): return np.concatenate(data).tolist()