Exemplo n.º 1
0
def connected_components_diff(game, player):
    """
    Difference between number of connected components
    of one player and its opponent

    :param game:
    :param player:
    :return:
    """
    size = game.width * game.height
    uf = UnionFind(size)
    blank = game.get_blank_spaces()
    for bs in blank:
        for n in neighbors(game, bs):
            uf.union(bs, n)
    player_location = game.get_player_location(player)
    opp_location = game.get_player_location(game.get_opponent(player))
    for n in neighbors(game, player_location):
        uf.union(n, player_location)
    for n in neighbors(game, opp_location):
        uf.union(n, opp_location)

    pl_score = float(uf.components(player_location))
    op_score = float(uf.components(opp_location))
    return pl_score - op_score
Exemplo n.º 2
0
 def UnionFindCommunity(self, G):
     Nodes = G.nodes()
     uf = UnionFind(Nodes)
     for source, target in G.edges():
         uf.union(source, target)
     components = uf.components()
     score = []
     for nodes in components:
         score.append(nodes)
     self.addGNodesAttr(G, score, "Union find")
Exemplo n.º 3
0
def trip_roster_merged(trip_roster_file, colname_file, trip_chain,
                       park_pair_file, gas_pair_file):
    col_names = pd.read_csv(colname_file)
    trip_roster = pd.read_csv(trip_roster_file,
                              header=None,
                              names=col_names.columns)
    if trip_chain == False:
        matched_trip_pair = pd.read_csv(park_pair_file)
        matched_trip_pair.columns = [
            'TripId', 'StopId', 'EndTripId', 'StartTripId'
        ]
        matched_trip_pair = matched_trip_pair[[
            'EndTripId', 'StartTripId', 'StopId'
        ]].append(pd.read_csv(gas_pair_file))
        trip_pair_id = ['EndTripId', 'StopId', 'StartTripId']
        new_pair_df = matched_trip_pair
    else:
        matched_trip_pair = pd.read_csv(park_pair_file,
                                        usecols=['end', 'start'])
        matched_trip_pair.columns = ['EndTripId', 'StartTripId']
        matched_trip_pair = matched_trip_pair.append(
            pd.read_csv(gas_pair_file, usecols=['EndTripId', 'StartTripId']))
        trip_pair_id = ['EndTripId', 'StartTripId']
        # trip chaining
        start_time = time.time()
        uf = UnionFind(list(set(matched_trip_pair.values.flatten())))
        for index, row in matched_trip_pair.iterrows():
            uf.union(row['EndTripId'], row['StartTripId'])
        result = uf.components()
        print('Trip chaining takes %s secs for %s trip pairs.' %
              (time.time() - start_time, len(matched_trip_pair)))

        def set_first_last(input_set):
            tmp = list(input_set)
            return [tmp[0], tmp[-1]]

        new_pair = map(set_first_last, result)
        new_pair_df = pd.DataFrame(new_pair,
                                   columns=['EndTripId', 'StartTripId'])
    trip_unmatched = trip_roster.loc[~trip_roster['TripId'].isin(
        matched_trip_pair[['EndTripId', 'StartTripId']].values.flatten())]
    # create od file for matched trips/ trip chain
    trip_od = new_pair_df.merge(
        trip_roster[['TripId', 'StartLocLat',
                     'StartLocLon']].rename(columns={'TripId': 'EndTripId'}),
        how='left',
        sort=False)
    trip_od = trip_od.merge(
        trip_roster[['TripId', 'EndLocLat',
                     'EndLocLon']].rename(columns={'TripId': 'StartTripId'}),
        how='left',
        sort=False)
    trip_od['TripId'] = trip_od[trip_pair_id].apply(
        lambda row: '_'.join(row.tolist()), axis=1)
    return trip_unmatched, trip_od
Exemplo n.º 4
0
def cluster(edges, start_at):
    uf = UnionFind()

    sortede = sorted(edges, key=lambda x: x[3])

    for k in range(1, 501):
        uf.add(k)

    for _, u, v, w in sortede:

        if len(list(uf.components())) == 4:
            break
        elif not uf.connected(u, v):
            uf.union(u, v)

    find_minimal(uf, edges)
Exemplo n.º 5
0
def cluster(nodes):
    uf = UnionFind()
    one_diff, two_diff = one_two_away()

    for v in nodes:
        uf.add(v)

    for vindex, v in enumerate(nodes):

        # squash all the nodes with distance 1 away into me
        od = [(v ^ i) for i in one_diff]
        td = [(v ^ i) for i in two_diff]
        for d in od + td:
            if d in nodes and not uf.connected(v, d):
                uf.union(v, d)
                print("smashed", v, d)

    print(len(list(uf.components())))
Exemplo n.º 6
0
def sample_config(config,
                  eta,
                  N,
                  no_colors,
                  sites,
                  param_name,
                  curr_params,
                  uf=None,
                  cluster_constraints=None):

    if uf == None and not only_averages:
        '''Generate clusters from the assigned bonds (eta_edge)'''
        uf = UnionFind(sites)
        eta_edges = eta[1]
        for i in range(eta_edges.shape[0]):
            for j in range(eta_edges.shape[1]):
                for e in range(eta_edges.shape[2]):
                    if eta_edges[i, j, e] == -1: continue
                    if eta_edges[i, j, e] == 0:
                        if e == 0:
                            uf.union(site2str((i, j)), site2str((i + 1, j)))
                        elif e == 1:
                            uf.union(site2str((i, j)), site2str((i, j + 1)))
        '''For each cluster, find the site with strongest constraint (smallest eta_site)
           and assign that eta_site to the entire cluster
        '''
        eta_sites = eta[2]
        cluster_constraints = {}
        cl_n = 0
        cls = np.zeros((eta_sites.shape[0], eta_sites.shape[1]),
                       dtype=np.uint16)  # up to 255x255 box
        for cluster in uf.components():
            cl_n += 1
            min_constraint = no_colors
            cluster_root = '-1,-1'
            for site_str in cluster:
                site = str2site(site_str)
                if eta_sites[site[0], site[1]] <= min_constraint:
                    min_constraint = eta_sites[site[0], site[1]]
                    cluster_root = site_str
                cls[site[0], site[1]] = cl_n
            cluster_constraints[cluster_root] = min_constraint
            cluster_constraints[cl_n] = min_constraint
        if prt: print('clusters formed by bonds (eta_edge):')
        if prt: print(cls)

    # Case with no field and gamma > 0
    if (curr_params['alpha'] == 0 and curr_params['gamma'] > 0):
        if prt: print('Case with no field and gamma > 0')
        '''Choose exactly how many colors to use in the configuration'''
        max_colors = eta[0]  # eta_lambda
        prob_k = []
        if only_averages:
            no_cl = N * N
        else:
            no_cl = len(uf.components())
        for k in range(1, max_colors + 1):
            prob_k.append(P_exact_cols(no_colors, k, no_cl, S))
        if prt: print('prob_k:', prob_k)
        prob_k = [pk / sum(prob_k) for pk in prob_k]
        prob_k = np.array(prob_k)
        if prt: print('prob_k:', prob_k)
        exact_k = np.random.choice((np.arange(1, max_colors + 1)), p=prob_k)
        if prt: print('exact number of colors to use in configuration:')
        if prt: print(exact_k)

        if only_averages:
            '''Only compute average in case with zero field and no interaction, but with gamma > 0,
               use an arbitray partition (avoid computing Bell polynomials)
            '''
            chosen_colors = range(1, exact_k + 1)
            if exact_k == no_colors:
                chosen_partition = [exact_k]
            else:
                chosen_partition = [
                    exact_k - 1
                ] + [0] * max(0, no_colors - exact_k - 1) + [1]
            part = []
            for i in range(len(chosen_partition)):
                part += ([i + 1] * chosen_partition[i])
            chosen_partition = part
            if prt: print('chosen partition:')
            if prt: print(chosen_partition)
            color_arr = []
            for i in range(len(chosen_partition)):
                color_arr += [chosen_colors[i]] * chosen_partition[i]
            config = np.array(color_arr).reshape((N, N))
            return config, None, None
        '''Sample a partition of the no. of clusters into k blocks using Bell polynomials'''
        partition_dict = {}
        if ((no_cl, exact_k) in bell_dict):
            partition_dict = bell_dict[(no_cl, exact_k)]
            if prt: print("Bell found", (no_cl, exact_k))
        else:
            session.evaluate("subs = Array[x," + str(no_cl - exact_k + 1) +
                             "]")
            partition_dict = session.evaluate(
                "Association@CoefficientRules[BellY[" + str(no_cl) + ", " +
                str(exact_k) + ", subs], subs]")
            bell_dict[(no_cl, exact_k)] = partition_dict
            if prt: print("Bell computed", (no_cl, exact_k))
        if prt: print("partition_dict", partition_dict)
        partitions = []
        partition_p = []
        for partition in partition_dict:
            partitions.append(partition)
            partition_p.append(partition_dict[partition])
        if prt: print('partition_p:', partition_p)
        partition_p = [pk / sum(partition_p) for pk in partition_p]
        parition_p = np.array(partition_p)
        if prt: print('partition_p:', partition_p)
        chosen_partition = partitions[np.random.choice(
            (np.arange(0, len(partitions))), p=partition_p)]
        if prt: print('chosen partition:')
        if prt: print(chosen_partition)
        # Transform to actual partition
        part = []
        for i in range(len(chosen_partition)):
            part += ([i + 1] * chosen_partition[i])
        chosen_partition = part
        if prt: print('chosen partition:')
        if prt: print(chosen_partition)
        '''Color each block in the partition randomly without replacement'''
        # Choose the colors to be used
        chosen_colors = np.random.choice((np.arange(1, no_colors + 1)),
                                         len(chosen_partition),
                                         replace=False)
        # Choose a random permutation of the given word
        color_arr = []
        for i in range(len(chosen_partition)):
            color_arr += [chosen_colors[i]] * chosen_partition[i]
        color_arr = np.array(color_arr)
        color_arr = np.random.permutation(color_arr)
        if prt: print('colors for clusters:')
        if prt: print(color_arr)
        '''Color each cluster with the assigned color'''
        i = 0
        for root in cluster_constraints:
            cluster_color = color_arr[i]
            for site_str in uf.component(root):
                site = str2site(site_str)
                config[site[0], site[1]] = cluster_color
            i += 1

    # Case with field and gamma > 0 or case with gamma = 0
    else:
        '''Randomly sample a color for each cluster'''
        if prt: print('Case with field and gamma > 0 or case with gamma = 0')
        config = brute_force_sample(cluster_constraints, cls, uf, config)

    return config, uf, cluster_constraints
Exemplo n.º 7
0
def remove_duplicates_instance_to_mask(mask,
                                       class_ids,
                                       score,
                                       PX_TH=20,
                                       SC_TH=0.3):

    mask_resize = mask[::5, ::5, :]
    iou_matrix = compute_iou_masksets_partial(mask_resize, mask_resize)

    uf = UnionFind(list(range(mask.shape[2])))
    overlap = list()
    for i in range(mask.shape[2]):
        for j in range(mask.shape[2]):
            if i == j:
                continue
            else:
                if iou_matrix[i, j] > 0.8 and class_ids[i] == class_ids[j]:
                    uf.union(i, j)
                    overlap.append(i)
                    overlap.append(j)

    overlap = np.unique(overlap)

    keep = []
    for n in range(iou_matrix.shape[0]):
        if n not in overlap:
            keep.append(n)
    # print('keep', keep)

    mask_instance_new = mask[:, :, keep]
    class_ids_new = list(class_ids[keep])
    score_new = list(score[keep])

    merged_sets = []
    for n, pair in enumerate(uf.components()):
        if len(pair) >= 2:
            merged_sets.append(pair)

    mask_instance_merged = np.zeros(
        [mask.shape[0], mask.shape[1],
         len(merged_sets)], dtype=bool)

    for n, pair in enumerate(merged_sets):
        mask_instance_merged[:, :,
                             n] = np.zeros([mask.shape[0], mask.shape[1]],
                                           dtype=bool)
        scores_this_set = []
        index_this_set = []
        class_id_this_set = []
        px_num_this_set = []
        for p in pair:
            scores_this_set.append(score[p])
            index_this_set.append(p)
            class_id_this_set.append(class_ids[p])
            px_num_this_set.append(np.sum(mask[:, :, p], axis=(0, 1)))

        index = np.argmax(np.array(scores_this_set))
        mask_instance_merged[:, :, n] = mask[:, :, index_this_set[index]]

        class_ids_new.append(class_id_this_set[index])
        score_new.append(scores_this_set[index])

    # print('before', mask_instance_new.shape)
    mask_instance_new = np.dstack((mask_instance_new, mask_instance_merged))
    # mask_instance_new = mask_instance_merged
    class_ids_pred = np.array(class_ids_new)
    scores_pred = np.array(score_new)

    # print('after', mask_instance_new.shape)

    n_px_per_instance = np.sum(mask_instance_new, axis=(0, 1))
    instance_keep = np.where(
        np.logical_and((n_px_per_instance > PX_TH), (scores_pred > SC_TH)))[0]
    if len(instance_keep) == 0:
        return None, None
    # print(instance_keep)
    instance_reorder = instance_keep[np.argsort(scores_pred[instance_keep])]
    # print(instance_reorder)
    score_reorder = scores_pred[instance_reorder]
    class_ids_reorder = class_ids_pred[instance_reorder]
    mask_reorder = mask_instance_new[:, :, instance_reorder]
    mask_reorder = remove_disconnected(mask_reorder)
    # print(mask_reorder.shape)
    # mask_reorder = fill_and_remove(mask_reorder)
    mask, instance_score = instance_to_mask(mask_reorder,
                                            class_ids_reorder,
                                            score_reorder,
                                            order_by_score=False)

    return mask, instance_score
Exemplo n.º 8
0
class GMM:
    def __init__(self, n_clusters, n_steps, eps=1e-20):
        self.n_clusters = n_clusters
        self.n_steps = n_steps
        self.eps = eps

    def _initialize(self):
        """
        Initializes self.alpha, self.mu, self.sigma, self.w
        """
        self.alpha = np.ones((self.n_clusters)) / self.n_clusters
        self.mu = self.X[np.random.choice(np.arange(self.n), self.n_clusters)]
        self.sigma = np.ones((self.n_clusters, self.d))
        self.chunklet_w = np.zeros((self.n_chunklets, self.n_clusters))

        #centers = init_centers(X, self.n_clusters)
        #dists = cdist(X, centers)
        #labels = np.argmin(dists, axis=1)

        #unq_labels, self.alpha = np.unique(labels, return_counts=True)

        #self.alpha = np.zeros(self.n_clusters)
        #self.mu = np.zeros((self.n_clusters, d))
        # Using diagonal variance
        #self.sigma = np.zeros((self.n_clusters, d))

        # for i, lbl in enumerate(unq_labels):
        #    cur_pts = np.where(labels == lbl)
        #    self.alpha[i] = cur_pts[0].shape[0]
        #    # initialize means
        #    self.mu[i, :] = np.mean(X[cur_pts], axis=0)

        #    centered = (X[cur_pts] - self.mu[i])**2
        #    centered = np.sum(centered, axis=0) / centered.shape[0]
        #    # initialize vars
        #    self.sigma[i, :] = self.alpha[i] * centered

        #self.alpha /= n

        # self._validate_sigma()

        #self.chunklet_w = np.zeros((self.chunklets.shape[0], self.n_clusters))

    def _transitive_closure(self):
        self.uf = UnionFind(np.arange(self.n))
        for link in self.ml:
            self.uf.union(link[0], link[1])
        self.chunklets = np.array(
            [np.array(list(i)) for i in self.uf.components()])
        self.n_chunklets = self.chunklets.shape[0]
        self.chunklet_shapes = np.array([i.shape[0] for i in self.chunklets])
        self.chunklet_shapes = self.chunklet_shapes.reshape(-1, 1)
        self.chunklet_means = np.array(
            [np.mean(self.X[i], axis=0) for i in self.chunklets])
        assert self.chunklet_means.shape == (self.n_chunklets, self.d)

    def fit(self, X, ml):
        self.n = X.shape[0]
        self.d = X.shape[1]
        self.X = X.copy()
        self.ml = ml.copy()

        self._transitive_closure()
        self._initialize()

        self.scores = []
        self.lls = []

        for step in range(self.n_steps):
            self.e_step()
            self.m_step()
            self.scores.append(self.score())
            self.lls.append(self.ll)
            print(f"Step {step+1} :: LL {self.ll} :: Score {self.scores[-1]}")
            if len(self.lls) >= 2 and np.abs(self.lls[-1] - self.lls[-2]) < 1e-2:
                print("Converged")
                break

    def get_labels(self):
        chunk_labels = np.argmax(self.chunklet_w, axis=1).astype(np.int)
        labels = np.zeros(self.n)
        for i, chunk in enumerate(self.chunklets):
            labels[chunk] = chunk_labels[i]
        return labels.astype(np.int)

    def llhood(self):
        ll = 0
        for i, chunklet in enumerate(self.chunklets):
            for j in range(self.n_clusters):
                numerator = mn.pdf(
                    self.X[chunklet], self.mu[j], np.diag(self.sigma[j]))
                ll += np.sum(np.log(numerator + self.eps), axis=0) *\
                        self.chunklet_w[i,j]
                ll += np.log(self.alpha[j] + self.eps) * self.chunklet_w[i,j]
        return ll

    def e_step(self):
        self.ll = 0

        for i, chunklet in enumerate(self.chunklets):
            denominator = 0
            numerators = []
            for j in range(self.n_clusters):
                numerator = mn.pdf(
                    self.X[chunklet], self.mu[j], np.diag(self.sigma[j]))

                self.ll += np.sum(np.log(numerator + self.eps), axis=0) *\
                        self.chunklet_w[i,j]
                self.ll += np.log(self.alpha[j] + self.eps) *\
                        self.chunklet_w[i,j]

                numerator = np.prod(numerator, axis=0)
                numerator *= self.alpha[j]
                denominator += numerator
                self.chunklet_w[i, j] = numerator
            self.chunklet_w[i, :] /= (denominator + self.eps)
            #assert np.abs(self.chunklet_w[i, :].sum() - 1) < eps,\
            #    np.abs(self.chunklet_w[i, :].sum())

    def m_step(self):
        self.alpha = self.chunklet_w.sum(axis=0) / self.n_chunklets

        for j in range(self.n_clusters):
            den = 0
            temp_mu = np.zeros((1, self.d))

            numfrac = self.chunklet_w[:, j, np.newaxis] * self.chunklet_shapes
            den = np.sum(numfrac, axis=0, keepdims=True)
            temp_mu = np.sum(self.chunklet_means * numfrac, axis=0)
            self.mu[j] = temp_mu / den

            diff_sq = (self.X - self.mu[j])**2
            temp_sigma = np.zeros((1, self.d))
            for i in range(self.n_chunklets):
                # calc sigmanew
                signew = diff_sq[self.chunklets[i]]
                signew = np.sum(signew, axis=0, keepdims=True)
                signew /= self.chunklet_shapes[i]
                temp_sigma += signew * numfrac[i]

            self.sigma[j] = temp_sigma / den

    def score(self):
        labels = self.get_labels()
        return silhouette_score(self.X, labels)
Exemplo n.º 9
0
def gen_model(dataset_name, ignore_wo_url=False, ignore_replies=False):
    event_data, missing_urls_amount = load_data(dataset_name)

    ##########
    # create set of tweet_ids
    # for a given tweet t:
    # if t does not have urls: add a tweet_id {t.id}_0
    # for each url_i in t: add a tweet_id {t.id}_{i}
    # for each url_i in t: add a tweet_id {t.reply_id}_{i}
    ##########
    tweet_ids = set()
    logging.info("create list of tweet_ids")

    for tweet_id, tweet in tqdm(event_data.items(), total=len(event_data)):
        added = False
        if not tweet.expanded_urls:
            if not ignore_wo_url:
                tweet_ids.add(f'{tweet_id}_0')
                added = True
        else:
            for i, url in enumerate(tweet.expanded_urls.values()):
                tweet_ids.add(f'{tweet_id}_{i}')
                added = True

        if added and tweet.reply_id != 'NULL':
            if tweet.reply_id in event_data and not ignore_replies:
                for i, url in enumerate(tweet.expanded_urls.values()):
                    tweet_ids.add(f'{tweet.reply_id}_{i}')

    ##########
    # for each tweet_id in the set of tweet_ids
    # add a pair
    ##########
    logging.info(
        "create pairs (t, u) or (t, t') for each tweet t and url u or replied/retweeted tweet t'"
    )
    replies_amount = 0
    retweets_amount = 0
    quotes_amount = 0
    missing_replies_amount = 0
    pairs = []

    for tweet_id in tweet_ids:
        frags = tweet_id.split('_')
        o_tweet_id = frags[0]
        i = int(frags[1])

        tweet = event_data[o_tweet_id]

        url = tweet.expanded_urls.get(i)
        if url:
            pairs.append((tweet_id, url))

        # retweets ARE considered, due to be exact text copies of the retweeted tweet
        if tweet.retweet_id != 'NULL':
            retweets_amount += 1
        if tweet.quote_id != 'NULL':
            quotes_amount += 1
        if tweet.reply_id != 'NULL':
            replies_amount += 1

            if tweet.reply_id in event_data:
                if not ignore_replies:
                    ## TODO esto esta bien?
                    pairs.append((tweet_id, f'{tweet.reply_id}_{i}'))
            else:
                missing_replies_amount += 1

    logging.info(
        f'total pairs: {len(pairs)}, retweets: {retweets_amount}, quotes: {quotes_amount}, replies: {replies_amount} '
        f'(missing: {missing_replies_amount}, missing urls: {missing_urls_amount})'
    )

    ##########
    """
        all keys must be the same time (in this case, strings);
        unionfind will vectorize operations and will cast everything in the array to the same type,
        so if there are integers and strings, it will cast everything to string and comparisons will fail
        when calling uf.components().
    """

    logging.info('applying union-find')
    uf = UnionFind()
    for u, v in pairs:
        uf.union(u, v)
    logging.info(f'total components: {len(uf.components())}')
    logging.info('\n')

    return {'components': uf.components(), 'event_data': event_data}