Exemplo n.º 1
0
    def expand(seed_set):
        members = seed_set
        print 'seed:', members, nx.subgraph(data_graph, set(
            flatten(map(lambda mem: nx.neighbors(data_graph, mem), members))) | members).edges()
        is_change = True
        while is_change:
            to_check_neighbors = list(flatten(map(lambda mem: nx.neighbors(data_graph, mem), members)))
            random.shuffle(to_check_neighbors)
            print to_check_neighbors
            is_change = False
            # for neighbor in to_check_neighbors:
            for neighbor in to_check_neighbors:
                if fitness(members | {neighbor}) > fitness(members):
                    is_change = True
                    members.add(neighbor)
                    fitness(members, is_print=True)
                    print 'add neighbor:', neighbor, members, 'w_in:', w_in, 'w_all:', w_all
                    break

            for member in members:
                if fitness(members - {member}) > fitness(members):
                    is_change = True
                    members.remove(member)
                    fitness(members, is_print=True)
                    print 'remove member:', member, members, 'w_in', w_in, 'w_all:', w_all
                    break
        print set(members)
        print '\n----------------------------\n'
Exemplo n.º 2
0
    def expand(seed_set):
        members = seed_set
        print 'seed:', members, nx.subgraph(
            data_graph,
            set(
                flatten(map(lambda mem: nx.neighbors(data_graph, mem),
                            members))) | members).edges()
        is_change = True
        while is_change:
            to_check_neighbors = list(
                flatten(map(lambda mem: nx.neighbors(data_graph, mem),
                            members)))
            random.shuffle(to_check_neighbors)
            print to_check_neighbors
            is_change = False
            # for neighbor in to_check_neighbors:
            for neighbor in to_check_neighbors:
                if fitness(members | {neighbor}) > fitness(members):
                    is_change = True
                    members.add(neighbor)
                    fitness(members, is_print=True)
                    print 'add neighbor:', neighbor, members, 'w_in:', w_in, 'w_all:', w_all
                    break

            for member in members:
                if fitness(members - {member}) > fitness(members):
                    is_change = True
                    members.remove(member)
                    fitness(members, is_print=True)
                    print 'remove member:', member, members, 'w_in', w_in, 'w_all:', w_all
                    break
        print set(members)
        print '\n----------------------------\n'
Exemplo n.º 3
0
def get_all_pitch_and_symbol(segements, raw=False):
    pitches = [[n['pitch'] for n in s['notes'] if n] for s in segements]
    pitches = list(iteration_utilities.flatten(pitches))

    symbols = [[c['symbol'] for c in s['chords'] if c] for s in segements]
    symbols = list(iteration_utilities.flatten(symbols))

    if not raw:
        pitches = [_normalize_pitch(p) for p in pitches]
        symbols = [_normalize_chord_symbol(s) for s in symbols]

    return pitches, symbols
 def list_read(self,path,is_flatten=False,is_return=True):
     # Try to read a txt file and return a list.Return [] if there was a
     # mistake.
     try:
         file = open(path, 'r')
     except IOError:
         error = []
         return error
     print('list read:' + path + 'start!')
     file_lines=open(path+'dd','a')
     lines=[]
     for line in file:
         if is_flatten:
             line = flatten(eval(line))
         else:
             line = eval(line)
         if is_return:
             lines.append(line)
         else:
             file_lines.write(line)
     file_lines.close()
     file.close()
     print('list read:' + path + 'done!')
     if is_return:
         return lines
Exemplo n.º 5
0
def process(ip):
    def sort(part):
        *hyp, ext = part.split("]")
        return hyp, ext

    hyps, exts = zip(*map(sort, ip.split("[")))

    return flatten(hyps), exts
def get_series(target_file, prog_file, out_file, chunk_size=32):
    links = []
    to_read = get_batch(target_file, prog_file, chunk_size)
    with Pool(chunk_size) as p:
        res = p.map(get_pg, to_read)
        assert len(res) > 0
        res = list(flatten(res))
    write_res(out_file, res)
    write_prog(prog_file, to_read)
def pool_res(func, inp, inp_map, out_map, num_processes):

    with Pool(num_processes) as p:
        inp = [inp_map(x) for x in inp]
        res = p.starmap(func, inp) if isinstance(inp[0], list) else p.map(
            func, inp)
        assert len(res) > 0
        res = list(map(out_map, res))
    return list(flatten(res))
Exemplo n.º 8
0
    def update_cascades_setZeroNotPossiblestoOne(self):
        self.calc_posterior_link_probs()
        self.probs_links.clear()

        links_all = sorted(self.link_probablity,
                           key=self.link_probablity.get,
                           reverse=True)
        casc_links = defaultdict(lambda: [])
        mpt = defaultdict(lambda: [])
        test_not_p = [defaultdict() for _ in range(self.num_of_cascades)]
        start_t = t.time()

        for casc_id in self.cascades.keys():
            link_weights = defaultdict(lambda: [])
            possible_casc_links = [
                l for l in links_all if l[0] in self.nodes_of_cascade[casc_id]
                and l[1] in self.nodes_of_cascade[casc_id]
            ]
            for link in possible_casc_links:
                s = link[0]
                r = link[1]
                dt = self.hit_time[r][casc_id] - self.hit_time[s][casc_id]
                if dt <= 0:
                    link_weights[link] = 0
                    test_not_p[casc_id][link] = 0
                if dt > 0:
                    test_not_p[casc_id][link] = 1
                    if link not in casc_links[casc_id]:
                        casc_links[casc_id].append(link)
                        w = np.exp(-dt) + 1
                        link_weights[link] = self.link_probablity[link] * w
            mpt[casc_id] = self.max_spanning_tree_of_each_cascade(
                casc_id, link_weights)

        not_possible_links_for_me = []
        for link in links_all:
            for casc_id in self.cascades.keys():
                if link in test_not_p[casc_id]:
                    if test_not_p[casc_id][link] == 1:
                        for other_casc_id in self.cascades.keys():
                            if link in test_not_p[other_casc_id] and test_not_p[
                                    other_casc_id][link] == 0:
                                test_not_p[other_casc_id][link] = 1

            for casc_id in self.cascades.keys():
                if link in test_not_p[casc_id] and test_not_p[casc_id][
                        link] == 0:
                    not_possible_links_for_me.append(link)

        for link in not_possible_links_for_me:
            self.probs_links[link] = 0

        for link, prob in self.link_probablity.items():
            if link not in self.probs_links.keys():
                self.probs_links[link] = prob
        return list(flatten((mpt.values())))
Exemplo n.º 9
0
def stats_value(infos, filed, note=True):
    if note:
        ret = [[n.get(filed) for n in s._notes if not isinstance(n, str)]
               for s in infos]
    else:
        ret = [[c.get(filed) for c in s._chords if not isinstance(c, str)]
               for s in infos]

    flatten = list(iteration_utilities.flatten(ret))
    return flatten
Exemplo n.º 10
0
def _booking_errors(ingest_info: ingest_info_pb2.IngestInfo) -> Dict[str, Set]:
    booking_ids = {booking.booking_id for booking in ingest_info.bookings}
    referenced_booking_ids = set(iteration_utilities.flatten(
        person.booking_ids for person in ingest_info.people))

    return {
        DUPLICATES: _get_duplicates(
            booking.booking_id for booking in ingest_info.bookings),
        NON_EXISTING_IDS: referenced_booking_ids - booking_ids,
        EXTRA_IDS: booking_ids - referenced_booking_ids
    }
Exemplo n.º 11
0
def _charge_errors(ingest_info: ingest_info_pb2.IngestInfo) -> Dict[str, Set]:
    charge_ids = {charge.charge_id for charge in ingest_info.charges}
    referenced_charge_ids = set(iteration_utilities.flatten(
        booking.charge_ids for booking in ingest_info.bookings))

    return {
        DUPLICATES: _get_duplicates(
            charge.charge_id for charge in ingest_info.charges),
        NON_EXISTING_IDS: referenced_charge_ids - charge_ids,
        EXTRA_IDS: charge_ids - referenced_charge_ids
    }
Exemplo n.º 12
0
    def update_cascades_countZero_One(self):
        self.calc_posterior_link_probs()
        self.probs_links.clear()

        links_all = sorted(self.link_probablity,
                           key=self.link_probablity.get,
                           reverse=True)
        mpt = defaultdict(lambda: [])
        test_not_p = [defaultdict() for _ in range(self.num_of_cascades)]
        time_of_start = t.time()

        for casc_id in self.cascades.keys():
            if casc_id == 0: continue
            possible_casc_links = [
                l for l in links_all if l[0] in self.nodes_of_cascade[casc_id]
                and l[1] in self.nodes_of_cascade[casc_id]
            ]
            link_weights = defaultdict(lambda: [])
            for link in possible_casc_links:
                s = link[0]
                r = link[1]
                dt = self.hit_time[r][casc_id] - self.hit_time[s][casc_id]
                if dt <= 0:
                    link_weights[link] = 0
                    test_not_p[casc_id][link] = 0
                if dt > 0:
                    test_not_p[casc_id][link] = 1
                    w = np.exp(-dt) + 1
                    link_weights[link] = self.link_probablity[link] * w
            mpt[casc_id] = self.max_spanning_tree_of_each_cascade(
                casc_id, link_weights)

        inferred_links = list(flatten((mpt.values())))

        not_possible_links_for_me = []
        for link in links_all:
            count_Zero = 0
            count_One = 0
            for i in range(self.num_of_cascades):
                if link in test_not_p[i]:
                    if test_not_p[i][link] == 0:
                        count_Zero += 1
                    else:
                        count_One += 1
            if count_Zero > count_One:
                not_possible_links_for_me.append(link)

        for link in not_possible_links_for_me:
            self.probs_links[link] = 0

        for link, prob in self.link_probablity.items():
            if link not in self.probs_links.keys():
                self.probs_links[link] = prob
        return inferred_links
Exemplo n.º 13
0
 def fitness(new_members, is_print=False):
     if len(new_members) == 1:
         return 0
     else:
         new_nodes = set(flatten(map(lambda mem: nx.neighbors(data_graph, mem), new_members))) | new_members
         global w_in
         global w_all
         w_all = len(nx.subgraph(data_graph, new_nodes).edges())
         w_in = len(nx.subgraph(data_graph, new_members).edges())
         if is_print:
             print 'w_in', w_in, nx.subgraph(data_graph, new_members).edges()
             print 'w_all', w_all, nx.subgraph(data_graph, new_nodes).edges()
         return float(w_in) / w_all
Exemplo n.º 14
0
 def load_from_json(self, path):
     print('json file load start!')
     contents = []
     titles = []
     file = open(path, 'r')
     for line in file:
         text = json.loads(line)
         content = text['content']
         content = list(flatten(content))
         content = content[0:int(len(content) * 0.4)]
         contents.append(content)
         title = text['title']
         titles.append(title)
     file.close()
     return contents, titles
Exemplo n.º 15
0
 def fitness(new_members, is_print=False):
     if len(new_members) == 1:
         return 0
     else:
         new_nodes = set(
             flatten(
                 map(lambda mem: nx.neighbors(data_graph, mem),
                     new_members))) | new_members
         global w_in
         global w_all
         w_all = len(nx.subgraph(data_graph, new_nodes).edges())
         w_in = len(nx.subgraph(data_graph, new_members).edges())
         if is_print:
             print 'w_in', w_in, nx.subgraph(data_graph,
                                             new_members).edges()
             print 'w_all', w_all, nx.subgraph(data_graph,
                                               new_nodes).edges()
         return float(w_in) / w_all
Exemplo n.º 16
0
def test_empty_input():
    empty = []

    assert list(iteration_utilities.combinations_from_relations({}, 1)) == []

    assert list(
        iteration_utilities.combinations_from_relations({'a': [1, 2, 3]},
                                                        2)) == []

    assert iteration_utilities.consume(empty, 2) is None

    assert list(iteration_utilities.flatten(empty)) == []

    assert list(iteration_utilities.getitem(range(10), empty)) == []

    x, y = iteration_utilities.ipartition(empty, lambda x: x)
    assert list(x) == [] and list(y) == []

    # no need to test iter_subclasses here

    assert list(iteration_utilities.ncycles(empty, 10)) == []

    assert list(iteration_utilities.powerset(empty)) == [()]

    assert iteration_utilities.random_combination(empty, 0) == ()
    assert iteration_utilities.random_combination(empty, 0, True) == ()

    assert iteration_utilities.random_permutation(empty, 0) == ()

    assert list(iteration_utilities.remove(range(10),
                                           empty)) == list(range(10))

    assert list(iteration_utilities.replace(range(10), 20,
                                            empty)) == list(range(10))

    # no need to test repeatfunc here

    # no need to test tabulate here

    assert list(iteration_utilities.tail(empty, 2)) == []
 def content2vectors(self, path_train, is_return=False, is_saved=True):
     """can convert your lists of word like [['i'],['me']] to vectors
         :param path_train: lists of word like [['i'],['me']]
         :isReturn:
         :isSaved:
         :return: lists of vetors with the same shape of path_train
     """
     try:
         file = open(path_train, 'r')
     except IOError:
         error = []
         return error
     print('list read:' + path_train + 'start!')
     vectors = []
     for line in file:
         line = eval(line)
         line = flatten(line)
         con_size = len(line)
         #            print(con_size)
         pre_size = int(len(line) * 0.2)
         post_size = int(len(line) * 0.1)
         #    print(pre_size,post_size)
         content = []
         if con_size < 4:
             content = line
         elif 4 <= con_size < 10:
             content = line[0:2] + line[con_size - 1 - 1:con_size - 1]
         else:
             content = line[0:pre_size] + line[con_size - 1 - post_size:con_size - 1]
         #            print(con_size)
         vector = self.wv[content]
         vectors.append(vector)
     if is_return:
         return vectors
     elif is_saved:
         np.save(path_train, vectors)
     print('list read:' + path_train + 'done!')
Exemplo n.º 18
0
    def query(self, vector, radius=1, top_k=5):
        res_indices = []
        ## Need to improve index calculations
        indices = vector.dot(self.base_vector.T).reshape(self.num_tables,
                                                         -1) > 0
        if radius == 0:
            res_indices = indices.dot(2**np.arange(
                self.n_vectors)) + np.arange(
                    self.num_tables) * 2**self.n_vectors
        elif radius == 1:
            clone_indices = indices.repeat(axis=0, repeats=self.n_vectors)
            rel_indices = (np.arange(self.num_tables) *
                           2**self.n_vectors).repeat(axis=0,
                                                     repeats=self.n_vectors)
            translate = np.tile(np.eye(self.n_vectors), (self.num_tables, 1))
            res_indices = (np.abs(clone_indices - translate).dot(2**np.arange(
                self.n_vectors)) + rel_indices).astype(int)
            res_indices = np.concatenate([
                res_indices,
                indices.dot(2**np.arange(self.n_vectors)) +
                np.arange(self.num_tables) * 2**self.n_vectors
            ])

        start = time.time()
        lst = self.hash_table[res_indices].tolist()
        self.lookup_index_times.append(time.time() - start)
        start = time.time()

        res = list(unique_everseen(duplicates(flatten(lst))))
        sim_scores = vector.dot(self.vectors[res].T)

        max_sim_indices = sim_scores.argsort()[-top_k:][::-1]
        max_sim_scores = sim_scores[max_sim_indices]

        return [(self.names[res[i]], score)
                for i, score in zip(max_sim_indices, max_sim_scores)]
Exemplo n.º 19
0
def instest():
    test = []
    testlim = []
    form = LimitsForm()
    data = []
    out_of_limit = []

    if form.validate_on_submit():
        testlim = InsTestRes.query.with_entities(InsTestRes.sensor_nb, 
                                                 InsTestRes.cap_min, InsTestRes.cap_max,
                                                 InsTestRes.cutoff_min, InsTestRes.cutoff_max,
                                                 InsTestRes.leakage, InsTestRes.noise) \
                                  .filter(cast(InsTestRes.updated, Date) == form.date_.data).all()

        out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \
                            .filter(cast(InsTest.updated, Date) == form.date_.data) \
                            .filter((InsTest.cap < testlim[0][1]) | (InsTest.cap > testlim[0][2])) \
                            .filter(InsTest.type == 2).all())
                    
        out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \
                            .filter(cast(InsTest.updated, Date) == form.date_.data) \
                            .filter((InsTest.cap < testlim[1][1]) | (InsTest.cap > testlim[1][2])) \
                            .filter(InsTest.type == 3).all())

        out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \
                            .filter(cast(InsTest.updated, Date) == form.date_.data) \
                            .filter((InsTest.cap < testlim[2][1]) | (InsTest.cap > testlim[2][2])) \
                            .filter(InsTest.type == 4).all())
                    
        out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \
                            .filter(cast(InsTest.updated, Date) == form.date_.data) \
                            .filter((InsTest.cap < testlim[3][1]) | (InsTest.cap > testlim[3][2])) \
                            .filter(InsTest.type == 5).all())

                            # cutoff
        out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \
                            .filter(cast(InsTest.updated, Date) == form.date_.data) \
                            .filter((InsTest.cutoff < testlim[0][3]) | (InsTest.cutoff > testlim[0][4])) \
                            .filter(InsTest.type == 2).all())
                    
        out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \
                            .filter(cast(InsTest.updated, Date) == form.date_.data) \
                            .filter((InsTest.cutoff < testlim[1][3]) | (InsTest.cutoff > testlim[1][4])) \
                            .filter(InsTest.type == 3).all())

        out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \
                            .filter(cast(InsTest.updated, Date) == form.date_.data) \
                            .filter((InsTest.cutoff < testlim[2][3]) | (InsTest.cutoff > testlim[2][4])) \
                            .filter(InsTest.type == 4).all())
                    
        out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \
                            .filter(cast(InsTest.updated, Date) == form.date_.data) \
                            .filter((InsTest.cutoff < testlim[3][3]) | (InsTest.cutoff > testlim[3][4])) \
                            .filter(InsTest.type == 5).all())

                            # noise
        out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \
                            .filter(cast(InsTest.updated, Date) == form.date_.data) \
                            .filter(InsTest.noise > testlim[0][6]) \
                            .filter(InsTest.type == 2).all())
                    
        out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \
                            .filter(cast(InsTest.updated, Date) == form.date_.data) \
                            .filter(InsTest.noise > testlim[1][6]) \
                            .filter(InsTest.type == 3).all())

        out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \
                            .filter(cast(InsTest.updated, Date) == form.date_.data) \
                            .filter(InsTest.noise > testlim[2][6]) \
                            .filter(InsTest.type == 4).all())
                    
        out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer) \
                            .filter(cast(InsTest.updated, Date) == form.date_.data) \
                            .filter(InsTest.noise > testlim[3][6]) \
                            .filter(InsTest.type == 5).all())

        # leakge
        out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer)
                            .filter(cast(InsTest.updated, Date) == form.date_.data)
                            .filter(InsTest.leakage < testlim[0][5])
                            .filter(InsTest.type == 2).all())
                    
        out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer)
                            .filter(cast(InsTest.updated, Date) == form.date_.data)
                            .filter(InsTest.leakage < testlim[1][5])
                            .filter(InsTest.type == 3).all())

        out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer)
                            .filter(cast(InsTest.updated, Date) == form.date_.data)
                            .filter(InsTest.leakage < testlim[2][5])
                            .filter(InsTest.type == 4).all())
                    
        out_of_limit.append(InsTest.query.with_entities(InsTest.ass_sn, InsTest.trace, InsTest.streamer)
                            .filter(cast(InsTest.updated, Date) == form.date_.data)
                            .filter(InsTest.leakage < testlim[3][5])
                            .filter(InsTest.type == 5).all())

        out_list = []
        out_trace_str = []
        out_of_limit = list(flatten(out_of_limit))
        for r in out_of_limit:
            out_list.append(r[0])
            out_trace_str.append([r[1], r[2]])

        test = InsTest.query.with_entities(InsTest.streamer, InsTest.trace, InsTest.ass_sn) \
                            .filter(cast(InsTest.updated, Date) == form.date_.data).all()

        all_in_col = []
        streamer = 1
        sn = None
        trace = []
        i = 1
        streamers = []
        data = []
        temp = []

        for i in test:
            if i[0] == streamer and sn != i[2]:
                all_in_col.append([i[0], i[1], i[2]])
                sn = i[2]
            else:
                if i[0] != streamer:
                    all_in_col.append([i[0], i[1], i[2]])
                    sn = i[2]
                    streamer = streamer + 1

        for pos in all_in_col:
            if pos[1] not in trace:
                trace.append(pos[1])
            if pos[0] not in streamers:
                streamers.append(pos[0])
        i = 1    
        for tr in trace:
            temp.append(i)
            temp.append(str(tr) + '>>' + str(tr + 11))
            for info in all_in_col:
                if info[1] == tr:
                    temp.append(info[2])
            data.append(temp)
            temp = []
            i = i + 1
        
        return render_template('instest.html', form=form, test=data, testlim=testlim, cap_out2=out_list, out_of_limit=out_of_limit)

    return render_template('instest.html', form=form, test=data, testlim=testlim)
Exemplo n.º 20
0
def vectorize(examples,
              word_dict,
              entity_dict,
              max_s_len,
              max_s_numb,
              sort_by_len=True,
              verbose=True):
    """
        Vectorize `examples`.
        in_x1, in_x2: sequences for document and question respecitvely.
        in_y: label
        in_l: whether the entity label occurs in the document.
    """
    in_x1 = []
    in_x2 = []
    in_l = np.zeros((len(examples[0]), len(entity_dict)))
    in_y = []

    # stat_len =[]
    # stat_wordxsent = []
    for idx, (d, q, a) in enumerate(zip(examples[0], examples[1],
                                        examples[2])):
        d_sents = d.split(' . ')
        for i, s in enumerate(d_sents):
            d_sents[i] = s.split(' ')
        # stat_len.append(len(d_sents))
        # stat_wordxsent.append(max([len(s)for s in d_sents]))
        # d_words = d.split(' ')
        q_words = q.split(' ')
        assert (a in flatten(d_sents))

        for i, s in enumerate(d_sents):
            ls = max(0, max_s_len - len(s))
            d_sents[i] = [word_dict[w] if w in word_dict else 0
                          for w in s] + [0] * ls
            d_sents[i] = d_sents[i][:max_s_len]

        # pad to memory_size
        lm = max(0, max_s_numb - len(d_sents))
        for _ in range(lm):
            d_sents.append([0] * max_s_len)
        d_sents = d_sents[:max_s_numb]
        # seq1 = [word_dict[w] if w in word_dict else 0 for w in d_words]
        # seq2 = [word_dict[w] if w in word_dict else 0 for w in q_words]

        ls = max(0, max_s_len - len(q_words))
        q_words = [word_dict[w] if w in word_dict else 0
                   for w in q_words] + [0] * ls
        q_words = q_words[:max_s_len]

        if (len(d_sents) > 0) and (len(q_words) > 0):
            in_x1.append(d_sents)
            in_x2.append(q_words)
            in_l[
                idx,
                [entity_dict[w] for w in flatten(d_sents)
                 if w in entity_dict]] = 1.0
            in_y.append(entity_dict[a] if a in entity_dict else 0)

        if verbose and (idx % 100000 == 0):
            logging.info('Vectorization: processed %d / %d' %
                         (idx, len(examples[0])))
    # logging.info('Max sent:{}\t Avg sent: {} Std sent:{}'.format(max(stat_len),sum(stat_len)/len(stat_len),np.std(stat_len)))
    # logging.info('Max wxse:{}\t Avg wxse: {} Std wxse:{}'.format(max(stat_wordxsent),sum(stat_wordxsent)/len(stat_wordxsent),np.std(stat_wordxsent)))

    # def len_argsort(seq):
    #     return sorted(range(len(flatten(seq))), key=lambda x: len(flatten(seq)[x]))
    #
    # if sort_by_len:
    #     # sort by the document length
    #     sorted_index = len_argsort(in_x1)
    #     in_x1 = [in_x1[i] for i in sorted_index]
    #     in_x2 = [in_x2[i] for i in sorted_index]
    #     in_l = in_l[sorted_index]
    #     in_y = [in_y[i] for i in sorted_index]

    return np.array(in_x1), np.expand_dims(np.array(in_x2),
                                           axis=1), in_l, np.array(in_y)
Exemplo n.º 21
0
    def update_cascades_consider_as_trees_as_toInfer(self, toInfer):
        self.calc_posterior_link_probs()
        self.probs_links.clear()
        links_all = sorted(self.link_probablity,
                           key=self.link_probablity.get,
                           reverse=True)
        mpt = defaultdict(lambda: [])
        test_not_p = [defaultdict() for _ in range(self.num_of_cascades)]
        start_t = t.time()
        link_weights = [
            defaultdict(lambda: []) for _ in range(self.num_of_cascades)
        ]

        for casc_id in self.cascades.keys():
            possible_casc_links = [
                l for l in links_all if l[0] in self.nodes_of_cascade[casc_id]
                and l[1] in self.nodes_of_cascade[casc_id]
            ]
            for link in possible_casc_links:
                s = link[0]
                r = link[1]
                dt = self.hit_time[r][casc_id] - self.hit_time[s][casc_id]
                if dt <= 0:
                    link_weights[casc_id][link] = 0
                    test_not_p[casc_id][link] = 0
                if dt > 0:
                    test_not_p[casc_id][link] = 1
                    w = np.exp(-dt) + 1
                    link_weights[casc_id][
                        link] = self.link_probablity[link] * w
            mpt[casc_id] = self.max_spanning_tree_of_each_cascade(
                casc_id, link_weights[casc_id])

        inferred_links = set()
        for link in flatten((mpt.values())):
            if (link[1], link[0]) not in inferred_links:
                inferred_links.add(link)
        count_casc = 0
        while len(inferred_links) < toInfer:
            for casc_id in self.cascades.keys():
                new_links, highest_link = self.max_spanning_tree_of_each_cascade_2(
                    link_weights[casc_id], list(mpt[casc_id]))
                if len(new_links) > 0:
                    mpt[casc_id].append(highest_link)
                    if (highest_link[1],
                            highest_link[0]) not in inferred_links:
                        inferred_links.add(highest_link)
                count_casc += 1
            if len(inferred_links) >= toInfer or count_casc >= len(
                    self.cascades):
                break
        not_possible_links_for_me = []
        for link in links_all:
            count_Zero = 0
            count_One = 0
            for i in range(self.num_of_cascades):
                if link in test_not_p[i]:
                    if test_not_p[i][link] == 0:
                        count_Zero += 1
                    else:
                        count_One += 1
            if count_Zero > count_One:
                not_possible_links_for_me.append(link)

        for link in not_possible_links_for_me:
            self.probs_links[link] = 0

        for link, prob in self.link_probablity.items():
            if link not in self.probs_links.keys():
                self.probs_links[link] = prob
        return list(flatten((mpt.values())))
Exemplo n.º 22
0
 def ssl(hyps, exts):
     inside = set(flatten(map(abas, hyps)))
     return not inside.isdisjoint(starmap(rev, flatten(map(abas, exts))))
Exemplo n.º 23
0
def getSimilarBills(es_similarity: List[dict]) -> dict:
    """
  Get a dict of similar bills and matching sections
  Remove items from the 'similar_sec' object which refer to the same bill section.
  Retain only the higest scoring match.

  Args:
      es_similarity (list[dict]): the es_similarity object generated by getSimilarSections 

  Returns:
      similarBills (dict): a dict of the form:
    {
      116hr1500: [
        {section_num: 4, section_header: 'Definitions', score: 48.76, 
        sectionIndex: [index of section from original bill] }, ...
      ]
    }
  """
    similarBills = {}
    sectionSimilars = [
        item.get('similar_sections', []) for item in es_similarity
    ]
    billnumbers = list(
        unique_everseen(
            flatten(
                [[similarItem.get('billnumber') for similarItem in similars]
                 for similars in sectionSimilars])))
    for billnumber in billnumbers:
        try:
            similarBills[billnumber] = []
            for sectionIndex, similarItem in enumerate(sectionSimilars):
                sectionBillItems = sorted(filter(
                    lambda x: x.get('billnumber', '') == billnumber,
                    similarItem),
                                          key=lambda k: k.get('score', 0),
                                          reverse=True)
                if sectionBillItems and len(sectionBillItems) > 0:
                    for sectionBillItem in sectionBillItems:
                        # Check if we've seen this billItem before and which has a higher score
                        currentScore = sectionBillItem.get('score', 0)
                        currentSection = sectionBillItem.get(
                            'section_num', '') + sectionBillItem.get(
                                'section_header', '')
                        dupeIndexes = [
                            similarBillIndex
                            for similarBillIndex, similarBill in enumerate(
                                similarBills.get(billnumber, []))
                            if (similarBill.get('section_num', '') +
                                similarBill.get('section_header', '')
                                ) == currentSection
                        ]
                        if not dupeIndexes:
                            sectionBillItem['sectionIndex'] = str(sectionIndex)
                            sectionBillItem[
                                'target_section_number'] = es_similarity[
                                    sectionIndex].get('section_number', '')
                            sectionBillItem[
                                'target_section_header'] = es_similarity[
                                    sectionIndex].get('section_header', '')
                            similarBills[billnumber].append(sectionBillItem)
                            break
                        elif currentScore > similarBills[billnumber][
                                dupeIndexes[0]].get('score', 0):
                            del similarBills[billnumber][dupeIndexes[0]]
                            similarBills[billnumber].append(sectionBillItem)
        except Exception as err:
            print(err)

    return similarBills
Exemplo n.º 24
0
def remove_minimum(input_list):
    input_list = list(flatten(input_list))
    input_list.remove(min(input_list))
    return input_list
Exemplo n.º 25
0
 def check(bots, _):
     nums = list(flatten([bots[f'output {i}'] for i in range(3)]))
     return reduce(lambda a, b: a * b, nums) if len(nums) == 3 else None