Exemplo n.º 1
0
def test_general():
    gc = pygco.gco()
    gc.createGeneralGraph(n_sites, n_labels, True)
    print gc.handle
    gc.destroyGraph()

    u = np.array([[2, 8, 8], [7, 3, 7], [8, 8, 2], [6, 4, 6]], dtype=np.intc)
    e = np.array([[0, 1], [1, 2], [2, 3]], dtype=np.intc)
    ew = np.array([3, 10, 1], dtype=np.intc)
    s = (1 - np.eye(3)).astype(np.intc)

    print pygco.cut_general_graph(e, ew, u, s, n_iter=1)
Exemplo n.º 2
0
def test_float():
    unary_cost = np.array([[0.0, 1.0, 2.0],
                           [4.0, 1.0, 0.0],
                           [0.0, 1.0, 2.0]])
    edges = np.array([[0, 1],
                      [1, 2],
                      [0, 2]]).astype(np.int32)
    pairwise_cost = np.array([[0.0, 1.0, 1.0],
                              [1.0, 0.0, 1.0],
                              [1.0, 1.0, 0.0]])
    edge_weights = np.array([2.0, 0.0, 0.0])

    n_sites = 3
    n_labels = 3
    n_edges = 3

    print pygco.cut_general_graph(edges, edge_weights, unary_cost, pairwise_cost, n_iter=-1, algorithm="swap")
Exemplo n.º 3
0
def test_float():
    unary_cost = np.array([[0.0, 1.0, 2.0], [4.0, 1.0, 0.0], [0.0, 1.0, 2.0]])
    edges = np.array([[0, 1], [1, 2], [0, 2]]).astype(np.int32)
    pairwise_cost = np.array([[0.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                              [1.0, 1.0, 0.0]])
    edge_weights = np.array([2.0, 0.0, 0.0])

    n_sites = 3
    n_labels = 3
    n_edges = 3

    print pygco.cut_general_graph(edges,
                                  edge_weights,
                                  unary_cost,
                                  pairwise_cost,
                                  n_iter=-1,
                                  algorithm="swap")
Exemplo n.º 4
0
def test_general():
    gc = pygco.gco()
    gc.createGeneralGraph(n_sites, n_labels, True)
    print gc.handle
    gc.destroyGraph()

    u = np.array([
        [2, 8, 8], 
        [7, 3, 7], 
        [8, 8, 2],
        [6, 4, 6]
    ], dtype=np.intc)
    e = np.array([
        [0, 1],
        [1, 2],
        [2, 3]
    ], dtype=np.intc)
    ew = np.array([3, 10, 1], dtype=np.intc)
    s = (1 - np.eye(3)).astype(np.intc)

    print pygco.cut_general_graph(e, ew, u, s, n_iter=1)
Exemplo n.º 5
0
def test_integer():
    """  """
    unary = np.array([[2, 8, 8],
                      [7, 3, 7],
                      [8, 8, 2],
                      [6, 4, 6]])
    edges = np.array([[0, 1], [1, 2], [2, 3]])
    edge_weight = np.array([3, 10, 1])
    smooth = 1 - np.eye(3)

    labels = pygco.cut_general_graph(edges, edge_weight, unary, smooth,
                                     n_iter=1)
    np.array_equal(labels, np.array([0, 2, 2, 1]))
Exemplo n.º 6
0
def test_integer():
    """  """
    unary = np.array([[2, 8, 8],
                      [7, 3, 7],
                      [8, 8, 2],
                      [6, 4, 6]])
    edges = np.array([[0, 1], [1, 2], [2, 3]])
    edge_weight = np.array([3, 10, 1])
    smooth = 1 - np.eye(3)

    labels = pygco.cut_general_graph(edges, edge_weight, unary, smooth,
                                     n_iter=1)
    np.array_equal(labels, np.array([0, 2, 2, 1]))
Exemplo n.º 7
0
def test_float():
    """  """
    unary = np.array([[0.0, 1.0, 2.0],
                      [4.0, 1.0, 0.0],
                      [1.0, 0.0, 2.0]])
    edges = np.array([[0, 1],
                      [1, 2],
                      [0, 2]]).astype(np.int32)
    smooth = (1 - np.eye(3)).astype(np.float)
    edge_weights = np.array([2.0, 0.0, 0.0])

    labels = pygco.cut_general_graph(edges, edge_weights, unary, smooth,
                                     n_iter=-1, algorithm="swap")
    np.array_equal(labels, np.array([0, 2, 1]))
Exemplo n.º 8
0
def test_float():
    """  """
    unary = np.array([[0.0, 1.0, 2.0],
                      [4.0, 1.0, 0.0],
                      [1.0, 0.0, 2.0]])
    edges = np.array([[0, 1],
                      [1, 2],
                      [0, 2]]).astype(np.int32)
    smooth = (1 - np.eye(3)).astype(np.float)
    edge_weights = np.array([2.0, 0.0, 0.0])

    labels = pygco.cut_general_graph(edges, edge_weights, unary, smooth,
                                     n_iter=-1, algorithm="swap")
    np.array_equal(labels, np.array([0, 2, 1]))
Exemplo n.º 9
0
def unary_pairwise_predict_single_image(unary, edges, edge_weights, pw_weight):
    """
    Run graph cuts to make predictions for segmentation of a single image.
    The pairwise model is a Potts model.

    unary: N*K, N is the number of pixels, K is the number of classes
    edges, edge_weights: pairwise potentials generated by one of the get_pw 
        functions, or in the same format.
    pw_weight: a parameter for relative importance of pairwise potentials
        compared to unary potentials.

    return: y, a N-d label vector
    """
    return pygco.cut_general_graph(edges, edge_weights, unary, 
            pw_weight * (1 - np.eye(2)), n_iter=-1, algorithm='expansion')
Exemplo n.º 10
0
def unary_pairwise_predict_single_image(unary, edges, edge_weights, pw_weight):
    """
    Run graph cuts to make predictions for segmentation of a single image.
    The pairwise model is a Potts model.

    unary: N*K, N is the number of pixels, K is the number of classes
    edges, edge_weights: pairwise potentials generated by one of the get_pw 
        functions, or in the same format.
    pw_weight: a parameter for relative importance of pairwise potentials
        compared to unary potentials.

    return: y, a N-d label vector
    """
    return pygco.cut_general_graph(edges,
                                   edge_weights,
                                   unary,
                                   pw_weight * (1 - np.eye(2)),
                                   n_iter=-1,
                                   algorithm='expansion')
Exemplo n.º 11
0
def detect_paraphrase(k=.8,
                      clutter=50,
                      region_num=5,
                      split='val',
                      data_path=None,
                      max_n=None,
                      saveto=None,
                      verbose=False,
                      visual=False):
    '''
    k: float. parameter to balance the data term and the smooth term.
    clutter: float in range of [0,100]. control the number of clusters. larger number results in more clusters.
    '''

    t2t_df = pd.read_csv(data_path + '/alignment_%s.csv' % split)
    t2r_df = pd.read_csv(data_path + '/entity_region_scores_%s.csv' % split)

    if split == 'test':
        with open(data_path + '/filtered_test_id.txt', 'r') as f:
            img_list = [int(line.strip()) for line in f]
    else:
        img_list = t2t_df.image.unique()

    # test first max_n images
    img_list = img_list[:max_n]
    # img_list = [102851549]

    result = []

    # setup graph
    bar = progressbar.ProgressBar()

    N_gt = 0  # number of ground truth paraphrases

    for img in bar(img_list):
        p_detector = ParaphraseDetector(img,
                                        t2t_df,
                                        t2r_df,
                                        region_num,
                                        clutter=clutter,
                                        visual=visual)
        Gtw = p_detector.get_phrase_omega_graph()
        edges, weights, unary, pairwise, node2int = p_detector.cvrt2gco(Gtw)
        init_labels = unary.argmin(axis=1)

        # optimize
        if edges.size:  # if there is no edges between any phrases
            labels = pygco.cut_general_graph(edges,
                                             weights,
                                             unary,
                                             k * pairwise,
                                             init_labels=init_labels,
                                             n_iter=-1,
                                             algorithm='swap')
        else:
            labels = init_labels

        detected_phrase = []
        for l in np.unique(labels):
            nodes = [n for n in node2int if labels[node2int[n]] == l]
            detected = p_detector.iscandidates(combinations(
                nodes, 2))  # remove phrase pair from the same sentence
            detected_phrase += detected

        # check if each detected paraphrase is correct
        df = t2t_df.query('image == %i' % img)

        for phrase_pair in detected_phrase:
            res = df.query('(phrase1 == """%s""") and (phrase2 == """%s""")' %
                           phrase_pair)
            occurrence = len(res)
            gt_count = sum(res.region1 == res.region2)

            res = df.query('(phrase2 == """%s""") and (phrase1 == """%s""")' %
                           phrase_pair)
            occurrence += len(res)
            gt_count += sum(res.region1 == res.region2)

            result.append({
                'image': img,
                'phrase': phrase_pair,
                'occurrence': occurrence,
                'score': gt_count
            })

        # count ground truth paraphrase
        if visual:
            N_gt += len(
                df.query(
                    '(phrase1 != phrase2) and (region1 == region2) and (region1 != 0) and (region2 != 0)'
                ))
        else:
            N_gt += len(
                df.query('(phrase1 != phrase2) and (region1 == region2)'))

    # compute evaluation scores
    N_ans = sum([r['occurrence'] for r in result])
    N_tp = sum([r['score'] for r in result])
    prec = N_tp * 1. / N_ans
    rec = N_tp * 1. / N_gt
    f1 = 2. * (prec * rec) / (prec + rec)

    if verbose:
        print('No. prediction:', N_ans, ', No. true positive', N_tp,
              ', No. gt positives', N_gt)
        print('prec = %.4f rec = %.4f f1 = %.4f' % (prec, rec, f1))

    if saveto is not None:
        res_df = pd.DataFrame({
            'image': [r['image'] for r in result],
            'phrase1': [r['phrase'][0] for r in result],
            'phrase2': [r['phrase'][1] for r in result],
            'occurrence': [r['occurrence'] for r in result],
            'score': [r['score'] for r in result]
        })

        res_df.to_csv(saveto)

    return f1, prec, rec
def detect_paraphrase(phrase_pair_file,
                      graph_dir,
                      p2r_dir,
                      rfeat_dir,
                      p2r_method,
                      r2r_method,
                      top_k=5,
                      clutter=90,
                      damping=0.5,
                      k=.1,
                      thresh=.58,
                      saveto=None,
                      wo_phrase_connection=False):

    phrase_pair_df = pd.read_csv(phrase_pair_file, encoding="utf-8")

    bar = progressbar.ProgressBar()
    y_true = []
    y_pred = []
    items = []

    for fn in bar(os.listdir(graph_dir)):
        graph_data = json.load(open(os.path.join(graph_dir, fn)))
        edges = np.asarray(graph_data['edges'])
        weights = np.asarray(graph_data['weights'])
        weights = weights / 2. + .5
        phrase = graph_data['phrases']

        # preprocess graph
        edges = edges[weights > thresh]
        weights = weights[weights > thresh]

        Mp2r = np.load(os.path.join(p2r_dir, fn[:-4] + 'npy'))
        Xr = np.load(os.path.join(rfeat_dir, fn[:-4] + 'npy'))
        r_ids = np.arange(len(Xr))

        Mp2r, smpl = preprocess_Mp2r(Mp2r, top_k)
        Xr = Xr[smpl]
        Xr /= np.linalg.norm(Xr, axis=1, keepdims=True)
        r_ids[smpl]

        # cluster regions
        Tcr = get_region2label_table(Xr, clutter, damping, metric=r2r_method)

        # p(l | i)
        # Table. size N_label x N_phrase
        Tli = np.dot(Tcr, Mp2r.T)

        unary = -np.log(Tli.T)
        pairwise = (1 - np.eye(unary.shape[-1]))

        # optimize labels
        init_labels = unary.argmin(axis=1)
        N_label = len(Tcr)

        if wo_phrase_connection:
            labels = init_labels
        elif (len(edges) > 0) and (N_label > 1):
            labels = pygco.cut_general_graph(edges,
                                             k * weights,
                                             unary,
                                             pairwise,
                                             n_iter=-1,
                                             algorithm='swap')
        else:  # if there is no edges between any phrases or only one cluster is found
            labels = init_labels

        # eval results
        p2i = {p: i for i, p in enumerate(phrase)}
        sub_df = phrase_pair_df[phrase_pair_df.image == int(fn[:-5])]

        for p in phrase:
            items.append((int(fn[:-5]), p, labels[p2i[p]]))

        for _, row in sub_df.iterrows():
            p1 = row['phrase1']
            p2 = row['phrase2']
            y_true.append(row['ytrue'])
            y_pred.append(labels[p2i[p1]] == labels[p2i[p2]])

    if saveto:
        res_df = pd.DataFrame(items, columns=['image', 'phrase', 'label'])
        res_df.to_csv(saveto)

    prec = precision_score(y_true, y_pred)
    rec = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    return f1, prec, rec