def expand(seed_set): members = seed_set print 'seed:', members, nx.subgraph(data_graph, set( flatten(map(lambda mem: nx.neighbors(data_graph, mem), members))) | members).edges() is_change = True while is_change: to_check_neighbors = list(flatten(map(lambda mem: nx.neighbors(data_graph, mem), members))) random.shuffle(to_check_neighbors) print to_check_neighbors is_change = False # for neighbor in to_check_neighbors: for neighbor in to_check_neighbors: if fitness(members | {neighbor}) > fitness(members): is_change = True members.add(neighbor) fitness(members, is_print=True) print 'add neighbor:', neighbor, members, 'w_in:', w_in, 'w_all:', w_all break for member in members: if fitness(members - {member}) > fitness(members): is_change = True members.remove(member) fitness(members, is_print=True) print 'remove member:', member, members, 'w_in', w_in, 'w_all:', w_all break print set(members) print '\n----------------------------\n'
def expand(seed_set): members = seed_set print 'seed:', members, nx.subgraph( data_graph, set( flatten(map(lambda mem: nx.neighbors(data_graph, mem), members))) | members).edges() is_change = True while is_change: to_check_neighbors = list( flatten(map(lambda mem: nx.neighbors(data_graph, mem), members))) random.shuffle(to_check_neighbors) print to_check_neighbors is_change = False # for neighbor in to_check_neighbors: for neighbor in to_check_neighbors: if fitness(members | {neighbor}) > fitness(members): is_change = True members.add(neighbor) fitness(members, is_print=True) print 'add neighbor:', neighbor, members, 'w_in:', w_in, 'w_all:', w_all break for member in members: if fitness(members - {member}) > fitness(members): is_change = True members.remove(member) fitness(members, is_print=True) print 'remove member:', member, members, 'w_in', w_in, 'w_all:', w_all break print set(members) print '\n----------------------------\n'
def get_all_pitch_and_symbol(segements, raw=False): pitches = [[n['pitch'] for n in s['notes'] if n] for s in segements] pitches = list(iteration_utilities.flatten(pitches)) symbols = [[c['symbol'] for c in s['chords'] if c] for s in segements] symbols = list(iteration_utilities.flatten(symbols)) if not raw: pitches = [_normalize_pitch(p) for p in pitches] symbols = [_normalize_chord_symbol(s) for s in symbols] return pitches, symbols
def list_read(self,path,is_flatten=False,is_return=True): # Try to read a txt file and return a list.Return [] if there was a # mistake. try: file = open(path, 'r') except IOError: error = [] return error print('list read:' + path + 'start!') file_lines=open(path+'dd','a') lines=[] for line in file: if is_flatten: line = flatten(eval(line)) else: line = eval(line) if is_return: lines.append(line) else: file_lines.write(line) file_lines.close() file.close() print('list read:' + path + 'done!') if is_return: return lines
def process(ip): def sort(part): *hyp, ext = part.split("]") return hyp, ext hyps, exts = zip(*map(sort, ip.split("["))) return flatten(hyps), exts
def get_series(target_file, prog_file, out_file, chunk_size=32): links = [] to_read = get_batch(target_file, prog_file, chunk_size) with Pool(chunk_size) as p: res = p.map(get_pg, to_read) assert len(res) > 0 res = list(flatten(res)) write_res(out_file, res) write_prog(prog_file, to_read)
def pool_res(func, inp, inp_map, out_map, num_processes): with Pool(num_processes) as p: inp = [inp_map(x) for x in inp] res = p.starmap(func, inp) if isinstance(inp[0], list) else p.map( func, inp) assert len(res) > 0 res = list(map(out_map, res)) return list(flatten(res))
def update_cascades_setZeroNotPossiblestoOne(self): self.calc_posterior_link_probs() self.probs_links.clear() links_all = sorted(self.link_probablity, key=self.link_probablity.get, reverse=True) casc_links = defaultdict(lambda: []) mpt = defaultdict(lambda: []) test_not_p = [defaultdict() for _ in range(self.num_of_cascades)] start_t = t.time() for casc_id in self.cascades.keys(): link_weights = defaultdict(lambda: []) possible_casc_links = [ l for l in links_all if l[0] in self.nodes_of_cascade[casc_id] and l[1] in self.nodes_of_cascade[casc_id] ] for link in possible_casc_links: s = link[0] r = link[1] dt = self.hit_time[r][casc_id] - self.hit_time[s][casc_id] if dt <= 0: link_weights[link] = 0 test_not_p[casc_id][link] = 0 if dt > 0: test_not_p[casc_id][link] = 1 if link not in casc_links[casc_id]: casc_links[casc_id].append(link) w = np.exp(-dt) + 1 link_weights[link] = self.link_probablity[link] * w mpt[casc_id] = self.max_spanning_tree_of_each_cascade( casc_id, link_weights) not_possible_links_for_me = [] for link in links_all: for casc_id in self.cascades.keys(): if link in test_not_p[casc_id]: if test_not_p[casc_id][link] == 1: for other_casc_id in self.cascades.keys(): if link in test_not_p[other_casc_id] and test_not_p[ other_casc_id][link] == 0: test_not_p[other_casc_id][link] = 1 for casc_id in self.cascades.keys(): if link in test_not_p[casc_id] and test_not_p[casc_id][ link] == 0: not_possible_links_for_me.append(link) for link in not_possible_links_for_me: self.probs_links[link] = 0 for link, prob in self.link_probablity.items(): if link not in self.probs_links.keys(): self.probs_links[link] = prob return list(flatten((mpt.values())))
def stats_value(infos, filed, note=True): if note: ret = [[n.get(filed) for n in s._notes if not isinstance(n, str)] for s in infos] else: ret = [[c.get(filed) for c in s._chords if not isinstance(c, str)] for s in infos] flatten = list(iteration_utilities.flatten(ret)) return flatten
def _booking_errors(ingest_info: ingest_info_pb2.IngestInfo) -> Dict[str, Set]: booking_ids = {booking.booking_id for booking in ingest_info.bookings} referenced_booking_ids = set(iteration_utilities.flatten( person.booking_ids for person in ingest_info.people)) return { DUPLICATES: _get_duplicates( booking.booking_id for booking in ingest_info.bookings), NON_EXISTING_IDS: referenced_booking_ids - booking_ids, EXTRA_IDS: booking_ids - referenced_booking_ids }
def _charge_errors(ingest_info: ingest_info_pb2.IngestInfo) -> Dict[str, Set]: charge_ids = {charge.charge_id for charge in ingest_info.charges} referenced_charge_ids = set(iteration_utilities.flatten( booking.charge_ids for booking in ingest_info.bookings)) return { DUPLICATES: _get_duplicates( charge.charge_id for charge in ingest_info.charges), NON_EXISTING_IDS: referenced_charge_ids - charge_ids, EXTRA_IDS: charge_ids - referenced_charge_ids }
def update_cascades_countZero_One(self): self.calc_posterior_link_probs() self.probs_links.clear() links_all = sorted(self.link_probablity, key=self.link_probablity.get, reverse=True) mpt = defaultdict(lambda: []) test_not_p = [defaultdict() for _ in range(self.num_of_cascades)] time_of_start = t.time() for casc_id in self.cascades.keys(): if casc_id == 0: continue possible_casc_links = [ l for l in links_all if l[0] in self.nodes_of_cascade[casc_id] and l[1] in self.nodes_of_cascade[casc_id] ] link_weights = defaultdict(lambda: []) for link in possible_casc_links: s = link[0] r = link[1] dt = self.hit_time[r][casc_id] - self.hit_time[s][casc_id] if dt <= 0: link_weights[link] = 0 test_not_p[casc_id][link] = 0 if dt > 0: test_not_p[casc_id][link] = 1 w = np.exp(-dt) + 1 link_weights[link] = self.link_probablity[link] * w mpt[casc_id] = self.max_spanning_tree_of_each_cascade( casc_id, link_weights) inferred_links = list(flatten((mpt.values()))) not_possible_links_for_me = [] for link in links_all: count_Zero = 0 count_One = 0 for i in range(self.num_of_cascades): if link in test_not_p[i]: if test_not_p[i][link] == 0: count_Zero += 1 else: count_One += 1 if count_Zero > count_One: not_possible_links_for_me.append(link) for link in not_possible_links_for_me: self.probs_links[link] = 0 for link, prob in self.link_probablity.items(): if link not in self.probs_links.keys(): self.probs_links[link] = prob return inferred_links
def fitness(new_members, is_print=False): if len(new_members) == 1: return 0 else: new_nodes = set(flatten(map(lambda mem: nx.neighbors(data_graph, mem), new_members))) | new_members global w_in global w_all w_all = len(nx.subgraph(data_graph, new_nodes).edges()) w_in = len(nx.subgraph(data_graph, new_members).edges()) if is_print: print 'w_in', w_in, nx.subgraph(data_graph, new_members).edges() print 'w_all', w_all, nx.subgraph(data_graph, new_nodes).edges() return float(w_in) / w_all
def load_from_json(self, path): print('json file load start!') contents = [] titles = [] file = open(path, 'r') for line in file: text = json.loads(line) content = text['content'] content = list(flatten(content)) content = content[0:int(len(content) * 0.4)] contents.append(content) title = text['title'] titles.append(title) file.close() return contents, titles
def fitness(new_members, is_print=False): if len(new_members) == 1: return 0 else: new_nodes = set( flatten( map(lambda mem: nx.neighbors(data_graph, mem), new_members))) | new_members global w_in global w_all w_all = len(nx.subgraph(data_graph, new_nodes).edges()) w_in = len(nx.subgraph(data_graph, new_members).edges()) if is_print: print 'w_in', w_in, nx.subgraph(data_graph, new_members).edges() print 'w_all', w_all, nx.subgraph(data_graph, new_nodes).edges() return float(w_in) / w_all
def test_empty_input(): empty = [] assert list(iteration_utilities.combinations_from_relations({}, 1)) == [] assert list( iteration_utilities.combinations_from_relations({'a': [1, 2, 3]}, 2)) == [] assert iteration_utilities.consume(empty, 2) is None assert list(iteration_utilities.flatten(empty)) == [] assert list(iteration_utilities.getitem(range(10), empty)) == [] x, y = iteration_utilities.ipartition(empty, lambda x: x) assert list(x) == [] and list(y) == [] # no need to test iter_subclasses here assert list(iteration_utilities.ncycles(empty, 10)) == [] assert list(iteration_utilities.powerset(empty)) == [()] assert iteration_utilities.random_combination(empty, 0) == () assert iteration_utilities.random_combination(empty, 0, True) == () assert iteration_utilities.random_permutation(empty, 0) == () assert list(iteration_utilities.remove(range(10), empty)) == list(range(10)) assert list(iteration_utilities.replace(range(10), 20, empty)) == list(range(10)) # no need to test repeatfunc here # no need to test tabulate here assert list(iteration_utilities.tail(empty, 2)) == []
def content2vectors(self, path_train, is_return=False, is_saved=True): """can convert your lists of word like [['i'],['me']] to vectors :param path_train: lists of word like [['i'],['me']] :isReturn: :isSaved: :return: lists of vetors with the same shape of path_train """ try: file = open(path_train, 'r') except IOError: error = [] return error print('list read:' + path_train + 'start!') vectors = [] for line in file: line = eval(line) line = flatten(line) con_size = len(line) # print(con_size) pre_size = int(len(line) * 0.2) post_size = int(len(line) * 0.1) # print(pre_size,post_size) content = [] if con_size < 4: content = line elif 4 <= con_size < 10: content = line[0:2] + line[con_size - 1 - 1:con_size - 1] else: content = line[0:pre_size] + line[con_size - 1 - post_size:con_size - 1] # print(con_size) vector = self.wv[content] vectors.append(vector) if is_return: return vectors elif is_saved: np.save(path_train, vectors) print('list read:' + path_train + 'done!')
def query(self, vector, radius=1, top_k=5): res_indices = [] ## Need to improve index calculations indices = vector.dot(self.base_vector.T).reshape(self.num_tables, -1) > 0 if radius == 0: res_indices = indices.dot(2**np.arange( self.n_vectors)) + np.arange( self.num_tables) * 2**self.n_vectors elif radius == 1: clone_indices = indices.repeat(axis=0, repeats=self.n_vectors) rel_indices = (np.arange(self.num_tables) * 2**self.n_vectors).repeat(axis=0, repeats=self.n_vectors) translate = np.tile(np.eye(self.n_vectors), (self.num_tables, 1)) res_indices = (np.abs(clone_indices - translate).dot(2**np.arange( self.n_vectors)) + rel_indices).astype(int) res_indices = np.concatenate([ res_indices, indices.dot(2**np.arange(self.n_vectors)) + np.arange(self.num_tables) * 2**self.n_vectors ]) start = time.time() lst = self.hash_table[res_indices].tolist() self.lookup_index_times.append(time.time() - start) start = time.time() res = list(unique_everseen(duplicates(flatten(lst)))) sim_scores = vector.dot(self.vectors[res].T) max_sim_indices = sim_scores.argsort()[-top_k:][::-1] max_sim_scores = sim_scores[max_sim_indices] return [(self.names[res[i]], score) for i, score in zip(max_sim_indices, max_sim_scores)]
def instest(): test = [] testlim = [] form = LimitsForm() data = [] out_of_limit = [] if form.validate_on_submit(): testlim = InsTestRes.query.with_entities(InsTestRes.sensor_nb, InsTestRes.cap_min, InsTestRes.cap_max, InsTestRes.cutoff_min, InsTestRes.cutoff_max, InsTestRes.leakage, InsTestRes.noise) \ .filter(cast(InsTestRes.updated, Date) == form.date_.data).all() out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \ .filter(cast(InsTest.updated, Date) == form.date_.data) \ .filter((InsTest.cap < testlim[0][1]) | (InsTest.cap > testlim[0][2])) \ .filter(InsTest.type == 2).all()) out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \ .filter(cast(InsTest.updated, Date) == form.date_.data) \ .filter((InsTest.cap < testlim[1][1]) | (InsTest.cap > testlim[1][2])) \ .filter(InsTest.type == 3).all()) out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \ .filter(cast(InsTest.updated, Date) == form.date_.data) \ .filter((InsTest.cap < testlim[2][1]) | (InsTest.cap > testlim[2][2])) \ .filter(InsTest.type == 4).all()) out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \ .filter(cast(InsTest.updated, Date) == form.date_.data) \ .filter((InsTest.cap < testlim[3][1]) | (InsTest.cap > testlim[3][2])) \ .filter(InsTest.type == 5).all()) # cutoff out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \ .filter(cast(InsTest.updated, Date) == form.date_.data) \ .filter((InsTest.cutoff < testlim[0][3]) | (InsTest.cutoff > testlim[0][4])) \ .filter(InsTest.type == 2).all()) out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \ .filter(cast(InsTest.updated, Date) == form.date_.data) \ .filter((InsTest.cutoff < testlim[1][3]) | (InsTest.cutoff > testlim[1][4])) \ .filter(InsTest.type == 3).all()) out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \ .filter(cast(InsTest.updated, Date) == form.date_.data) \ .filter((InsTest.cutoff < testlim[2][3]) | (InsTest.cutoff > testlim[2][4])) \ .filter(InsTest.type == 4).all()) out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \ .filter(cast(InsTest.updated, Date) == form.date_.data) \ .filter((InsTest.cutoff < testlim[3][3]) | (InsTest.cutoff > testlim[3][4])) \ .filter(InsTest.type == 5).all()) # noise out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \ .filter(cast(InsTest.updated, Date) == form.date_.data) \ .filter(InsTest.noise > testlim[0][6]) \ .filter(InsTest.type == 2).all()) out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \ .filter(cast(InsTest.updated, Date) == form.date_.data) \ .filter(InsTest.noise > testlim[1][6]) \ .filter(InsTest.type == 3).all()) out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \ .filter(cast(InsTest.updated, Date) == form.date_.data) \ .filter(InsTest.noise > testlim[2][6]) \ .filter(InsTest.type == 4).all()) out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \ .filter(cast(InsTest.updated, Date) == form.date_.data) \ .filter(InsTest.noise > testlim[3][6]) \ .filter(InsTest.type == 5).all()) # leakge out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) .filter(cast(InsTest.updated, Date) == form.date_.data) .filter(InsTest.leakage < testlim[0][5]) .filter(InsTest.type == 2).all()) out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) .filter(cast(InsTest.updated, Date) == form.date_.data) .filter(InsTest.leakage < testlim[1][5]) .filter(InsTest.type == 3).all()) out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) .filter(cast(InsTest.updated, Date) == form.date_.data) .filter(InsTest.leakage < testlim[2][5]) .filter(InsTest.type == 4).all()) out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) .filter(cast(InsTest.updated, Date) == form.date_.data) .filter(InsTest.leakage < testlim[3][5]) .filter(InsTest.type == 5).all()) out_list = [] out_trace_str = [] out_of_limit = list(flatten(out_of_limit)) for r in out_of_limit: out_list.append(r[0]) out_trace_str.append([r[1], r[2]]) test = InsTest.query.with_entities(InsTest.streamer, InsTest.trace, InsTest.ass_sn) \ .filter(cast(InsTest.updated, Date) == form.date_.data).all() all_in_col = [] streamer = 1 sn = None trace = [] i = 1 streamers = [] data = [] temp = [] for i in test: if i[0] == streamer and sn != i[2]: all_in_col.append([i[0], i[1], i[2]]) sn = i[2] else: if i[0] != streamer: all_in_col.append([i[0], i[1], i[2]]) sn = i[2] streamer = streamer + 1 for pos in all_in_col: if pos[1] not in trace: trace.append(pos[1]) if pos[0] not in streamers: streamers.append(pos[0]) i = 1 for tr in trace: temp.append(i) temp.append(str(tr) + '>>' + str(tr + 11)) for info in all_in_col: if info[1] == tr: temp.append(info[2]) data.append(temp) temp = [] i = i + 1 return render_template('instest.html', form=form, test=data, testlim=testlim, cap_out2=out_list, out_of_limit=out_of_limit) return render_template('instest.html', form=form, test=data, testlim=testlim)
def vectorize(examples, word_dict, entity_dict, max_s_len, max_s_numb, sort_by_len=True, verbose=True): """ Vectorize `examples`. in_x1, in_x2: sequences for document and question respecitvely. in_y: label in_l: whether the entity label occurs in the document. """ in_x1 = [] in_x2 = [] in_l = np.zeros((len(examples[0]), len(entity_dict))) in_y = [] # stat_len =[] # stat_wordxsent = [] for idx, (d, q, a) in enumerate(zip(examples[0], examples[1], examples[2])): d_sents = d.split(' . ') for i, s in enumerate(d_sents): d_sents[i] = s.split(' ') # stat_len.append(len(d_sents)) # stat_wordxsent.append(max([len(s)for s in d_sents])) # d_words = d.split(' ') q_words = q.split(' ') assert (a in flatten(d_sents)) for i, s in enumerate(d_sents): ls = max(0, max_s_len - len(s)) d_sents[i] = [word_dict[w] if w in word_dict else 0 for w in s] + [0] * ls d_sents[i] = d_sents[i][:max_s_len] # pad to memory_size lm = max(0, max_s_numb - len(d_sents)) for _ in range(lm): d_sents.append([0] * max_s_len) d_sents = d_sents[:max_s_numb] # seq1 = [word_dict[w] if w in word_dict else 0 for w in d_words] # seq2 = [word_dict[w] if w in word_dict else 0 for w in q_words] ls = max(0, max_s_len - len(q_words)) q_words = [word_dict[w] if w in word_dict else 0 for w in q_words] + [0] * ls q_words = q_words[:max_s_len] if (len(d_sents) > 0) and (len(q_words) > 0): in_x1.append(d_sents) in_x2.append(q_words) in_l[ idx, [entity_dict[w] for w in flatten(d_sents) if w in entity_dict]] = 1.0 in_y.append(entity_dict[a] if a in entity_dict else 0) if verbose and (idx % 100000 == 0): logging.info('Vectorization: processed %d / %d' % (idx, len(examples[0]))) # logging.info('Max sent:{}\t Avg sent: {} Std sent:{}'.format(max(stat_len),sum(stat_len)/len(stat_len),np.std(stat_len))) # logging.info('Max wxse:{}\t Avg wxse: {} Std wxse:{}'.format(max(stat_wordxsent),sum(stat_wordxsent)/len(stat_wordxsent),np.std(stat_wordxsent))) # def len_argsort(seq): # return sorted(range(len(flatten(seq))), key=lambda x: len(flatten(seq)[x])) # # if sort_by_len: # # sort by the document length # sorted_index = len_argsort(in_x1) # in_x1 = [in_x1[i] for i in sorted_index] # in_x2 = [in_x2[i] for i in sorted_index] # in_l = in_l[sorted_index] # in_y = [in_y[i] for i in sorted_index] return np.array(in_x1), np.expand_dims(np.array(in_x2), axis=1), in_l, np.array(in_y)
def update_cascades_consider_as_trees_as_toInfer(self, toInfer): self.calc_posterior_link_probs() self.probs_links.clear() links_all = sorted(self.link_probablity, key=self.link_probablity.get, reverse=True) mpt = defaultdict(lambda: []) test_not_p = [defaultdict() for _ in range(self.num_of_cascades)] start_t = t.time() link_weights = [ defaultdict(lambda: []) for _ in range(self.num_of_cascades) ] for casc_id in self.cascades.keys(): possible_casc_links = [ l for l in links_all if l[0] in self.nodes_of_cascade[casc_id] and l[1] in self.nodes_of_cascade[casc_id] ] for link in possible_casc_links: s = link[0] r = link[1] dt = self.hit_time[r][casc_id] - self.hit_time[s][casc_id] if dt <= 0: link_weights[casc_id][link] = 0 test_not_p[casc_id][link] = 0 if dt > 0: test_not_p[casc_id][link] = 1 w = np.exp(-dt) + 1 link_weights[casc_id][ link] = self.link_probablity[link] * w mpt[casc_id] = self.max_spanning_tree_of_each_cascade( casc_id, link_weights[casc_id]) inferred_links = set() for link in flatten((mpt.values())): if (link[1], link[0]) not in inferred_links: inferred_links.add(link) count_casc = 0 while len(inferred_links) < toInfer: for casc_id in self.cascades.keys(): new_links, highest_link = self.max_spanning_tree_of_each_cascade_2( link_weights[casc_id], list(mpt[casc_id])) if len(new_links) > 0: mpt[casc_id].append(highest_link) if (highest_link[1], highest_link[0]) not in inferred_links: inferred_links.add(highest_link) count_casc += 1 if len(inferred_links) >= toInfer or count_casc >= len( self.cascades): break not_possible_links_for_me = [] for link in links_all: count_Zero = 0 count_One = 0 for i in range(self.num_of_cascades): if link in test_not_p[i]: if test_not_p[i][link] == 0: count_Zero += 1 else: count_One += 1 if count_Zero > count_One: not_possible_links_for_me.append(link) for link in not_possible_links_for_me: self.probs_links[link] = 0 for link, prob in self.link_probablity.items(): if link not in self.probs_links.keys(): self.probs_links[link] = prob return list(flatten((mpt.values())))
def ssl(hyps, exts): inside = set(flatten(map(abas, hyps))) return not inside.isdisjoint(starmap(rev, flatten(map(abas, exts))))
def getSimilarBills(es_similarity: List[dict]) -> dict: """ Get a dict of similar bills and matching sections Remove items from the 'similar_sec' object which refer to the same bill section. Retain only the higest scoring match. Args: es_similarity (list[dict]): the es_similarity object generated by getSimilarSections Returns: similarBills (dict): a dict of the form: { 116hr1500: [ {section_num: 4, section_header: 'Definitions', score: 48.76, sectionIndex: [index of section from original bill] }, ... ] } """ similarBills = {} sectionSimilars = [ item.get('similar_sections', []) for item in es_similarity ] billnumbers = list( unique_everseen( flatten( [[similarItem.get('billnumber') for similarItem in similars] for similars in sectionSimilars]))) for billnumber in billnumbers: try: similarBills[billnumber] = [] for sectionIndex, similarItem in enumerate(sectionSimilars): sectionBillItems = sorted(filter( lambda x: x.get('billnumber', '') == billnumber, similarItem), key=lambda k: k.get('score', 0), reverse=True) if sectionBillItems and len(sectionBillItems) > 0: for sectionBillItem in sectionBillItems: # Check if we've seen this billItem before and which has a higher score currentScore = sectionBillItem.get('score', 0) currentSection = sectionBillItem.get( 'section_num', '') + sectionBillItem.get( 'section_header', '') dupeIndexes = [ similarBillIndex for similarBillIndex, similarBill in enumerate( similarBills.get(billnumber, [])) if (similarBill.get('section_num', '') + similarBill.get('section_header', '') ) == currentSection ] if not dupeIndexes: sectionBillItem['sectionIndex'] = str(sectionIndex) sectionBillItem[ 'target_section_number'] = es_similarity[ sectionIndex].get('section_number', '') sectionBillItem[ 'target_section_header'] = es_similarity[ sectionIndex].get('section_header', '') similarBills[billnumber].append(sectionBillItem) break elif currentScore > similarBills[billnumber][ dupeIndexes[0]].get('score', 0): del similarBills[billnumber][dupeIndexes[0]] similarBills[billnumber].append(sectionBillItem) except Exception as err: print(err) return similarBills
def remove_minimum(input_list): input_list = list(flatten(input_list)) input_list.remove(min(input_list)) return input_list
def check(bots, _): nums = list(flatten([bots[f'output {i}'] for i in range(3)])) return reduce(lambda a, b: a * b, nums) if len(nums) == 3 else None