Exemplo n.º 1
0
def sandwich_demo():
    x, y = sandwich_data()
    knn = nearest_neighbors(x, k=2)
    ax = pyplot.subplot(3, 1, 1)  # take the whole top row
    plot_sandwich_data(x, y, ax)
    plot_neighborhood_graph(x, knn, y, ax)
    ax.set_title('input space')
    ax.set_aspect('equal')
    ax.set_xticks([])
    ax.set_yticks([])

    num_constraints = 60
    mls = [(LMNN(), (x, y)),
           (ITML(), (x, ITML.prepare_constraints(y, len(x), num_constraints))),
           (SDML(), (x, SDML.prepare_constraints(y, len(x), num_constraints))),
           (LSML(), (x, LSML.prepare_constraints(y, num_constraints)))]

    for ax_num, (ml, args) in zip(xrange(3, 7), mls):
        ml.fit(*args)
        tx = ml.transform()
        ml_knn = nearest_neighbors(tx, k=2)
        ax = pyplot.subplot(3, 2, ax_num)
        plot_sandwich_data(tx, y, ax)
        plot_neighborhood_graph(tx, ml_knn, y, ax)
        ax.set_title('%s space' % ml.__class__.__name__)
        ax.set_xticks([])
        ax.set_yticks([])
    pyplot.show()
Exemplo n.º 2
0
def sdml_fit(samples, similarity_set, prior='covariance', balance_param=0.15):
    """Prior can be 'covariance', 'identity' or 'random'. 
    balance_param was used 0.5 in the first version of the paper, but it does not work here with such a large value. """

    n_samples = len(similarity_set)

    sdml = SDML(prior=prior,
                preprocessor=samples,
                verbose=True,
                balance_param=balance_param)

    pairs, Y = [], []
    for ind1 in range(n_samples):
        for ind2 in range(n_samples):
            pairs.append([ind1, ind2])
            if similarity_set[ind1, ind2]:
                Y.append(1)
            else:
                Y.append(-1)

    start = time()
    sdml.fit(pairs, Y)
    print("Fitting took {:.2f} seconds.".format(time() - start))

    return sdml
Exemplo n.º 3
0
 def test_sdml_converges_if_psd(self):
     """Tests that sdml converges on a simple problem where we know the
 pseudo-covariance matrix is PSD"""
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
     y = [1, -1]
     sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5)
     sdml.fit(pairs, y)
     assert np.isfinite(sdml.get_mahalanobis_matrix()).all()
Exemplo n.º 4
0
 def test_sdml_converges_if_psd(self):
   """Tests that sdml converges on a simple problem where we know the
   pseudo-covariance matrix is PSD"""
   pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
   y = [1, -1]
   sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5)
   sdml.fit(pairs, y)
   assert np.isfinite(sdml.get_mahalanobis_matrix()).all()
Exemplo n.º 5
0
def update(request):

    df_label = pd.read_csv(
        os.path.join(settings.BASE_DIR, 'data/outcome_labels.csv'))
    print("df_label", '\n', df_label)

    df_data = pd.read_csv(
        os.path.join(settings.BASE_DIR, 'data/features_rep.csv'))
    #print("df_data", df_data)

    #get unique row ids
    rowIDLIst = pd.concat([df_label.id1, df_label.id2],
                          axis=0).unique().tolist()
    #rowIDLIst2 = pd.concat([df_label.id1,df_label.id2],axis = 1).unique().tolist()
    print("rowIDLIst", '\n', rowIDLIst)
    #print("rowIDLIst2",'\n', rowIDLIst2)

    #connectivity graph
    cmatrix = np.zeros([len(rowIDLIst), len(rowIDLIst)])

    print("as_Matrix", '\n', df_label.as_matrix)
    for lbl in df_label.as_matrix():
        print("lbl", lbl)
        print("lbl[0]", lbl[0])
        print("lbl[1]", lbl[1])
        print("lbl[2]", lbl[2])
        print("rowIDLIst.index(lbl[0])", rowIDLIst.index(lbl[0]),
              "rowIDLIst.index(lbl[1])", rowIDLIst.index(lbl[1]))
        cmatrix[rowIDLIst.index(lbl[0])][rowIDLIst.index(lbl[1])] = int(lbl[2])
        cmatrix[rowIDLIst.index(lbl[1])][rowIDLIst.index(lbl[0])] = int(lbl[2])

    print("cmatrix", '\n', cmatrix)

    trainedData = []

    for rid in rowIDLIst:
        row = df_data.iloc[[rid]]
        trainedData.append(row)

    print("trainedData1", '\n', trainedData)

    trainedData = pd.concat(trainedData, axis=0).as_matrix()

    print("trainedData2" "\n", trainedData)

    metric = SDML().fit(trainedData, cmatrix)

    newData = metric.transform(df_data)

    al_selection = request.session['clustering']
    num_clustering = request.session['num_cluster']

    clusteringAndTSNE(newData, al_selection, num_clustering)
    # context is a dict of html code, containing three types of features representation
    content = {'Title': "Step 7: Clustering Visualization", "listId": "li7"}
    return render(request, 'clustering/stp7-clu-visualisation.html', content)
Exemplo n.º 6
0
def test_verbose_has_not_installed_skggm_sdml(capsys):
  # Test that if users have installed skggm, a message is printed telling them
  # skggm's solver is used (when they use SDML)
  # TODO: remove if we don't need skggm anymore
  pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
  y_pairs = [1, -1]
  sdml = SDML(verbose=True)
  sdml.fit(pairs, y_pairs)
  out, _ = capsys.readouterr()
  assert "SDML will use scikit-learn's graphical lasso solver." in out
Exemplo n.º 7
0
def test_verbose_has_not_installed_skggm_sdml(capsys):
    # Test that if users have installed skggm, a message is printed telling them
    # skggm's solver is used (when they use SDML)
    # TODO: remove if we don't need skggm anymore
    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
    y_pairs = [1, -1]
    sdml = SDML(verbose=True)
    sdml.fit(pairs, y_pairs)
    out, _ = capsys.readouterr()
    assert "SDML will use scikit-learn's graphical lasso solver." in out
Exemplo n.º 8
0
  def test_iris(self):
    num_constraints = 1500

    n = self.iris_points.shape[0]
    np.random.seed(1234)
    W = SDML.prepare_constraints(self.iris_labels, n, num_constraints)

    # Test sparse graph inputs.
    for graph in ((W, scipy.sparse.csr_matrix(W))):
      sdml = SDML().fit(self.iris_points, graph)
      csep = class_separation(sdml.transform(), self.iris_labels)
      self.assertLess(csep, 0.25)
Exemplo n.º 9
0
def update():

    df_label = pd.read_csv('data/outcome_labels.csv')
    print "df_label", '\n', df_label

    df_data = pd.read_csv('data/features_rep.csv')
    #df_data = pd.read_csv('data/alvin_rep.csv')
    print "df_data", '\n', df_data

    #print("df_data", df_data)

    #get unique row ids
    rowIDLIst = pd.concat([df_label.id1, df_label.id2],
                          axis=0).unique().tolist()
    #rowIDLIst2 = pd.concat([df_label.id1,df_label.id2],axis = 1)
    print "rowIDLIst", '\n', rowIDLIst
    #print("rowIDLIst2",'\n', rowIDLIst2)

    #connectivity graph
    cmatrix = np.zeros([len(rowIDLIst), len(rowIDLIst)])

    #print("as_Matrix", '\n', df_label.as_matrix)
    for lbl in df_label.as_matrix():
        #print ("lbl",lbl)
        #print ("lbl[0]",lbl[0])
        #print ("lbl[1]",lbl[1])
        #print ("lbl[2]",lbl[2])
        #print ("rowIDLIst.index(lbl[0])", rowIDLIst.index(lbl[0]),"rowIDLIst.index(lbl[1])",rowIDLIst.index(lbl[1]))
        cmatrix[rowIDLIst.index(lbl[0])][rowIDLIst.index(lbl[1])] = int(lbl[2])
        cmatrix[rowIDLIst.index(lbl[1])][rowIDLIst.index(lbl[0])] = int(lbl[2])

    print "cmatrixShape", '\n', cmatrix.shape

    trainedData = []

    for rid in rowIDLIst:
        row = df_data.iloc[[rid]]
        #print "row","\n",row
        #print "rowType","\n",type(row)
        trainedData.append(row)
        #print "trainedData","\n", trainedData

    #print "typetrainedData1", '\n', len(trainedData)

    trainedData = pd.concat(trainedData, axis=0).as_matrix()

    #print "trainedData2", "\n", trainedData
    print "trainedData.shape", '\n', trainedData.shape
    metric = SDML().fit(trainedData, cmatrix)

    newData = metric.transform(df_data)

    clusteringAndTSNE(newData)
Exemplo n.º 10
0
  def test_iris(self):
    num_constraints = 1500

    n = self.iris_points.shape[0]
    np.random.seed(1234)
    W = SDML.prepare_constraints(self.iris_labels, n, num_constraints)

    # Test sparse graph inputs.
    for graph in ((W, scipy.sparse.csr_matrix(W))):
      sdml = SDML().fit(self.iris_points, graph)
      csep = class_separation(sdml.transform(), self.iris_labels)
      self.assertLess(csep, 0.25)
Exemplo n.º 11
0
    def test_iris(self):
        num_constraints = 1500

        n = self.iris_points.shape[0]
        # Note: this is a flaky test, which fails for certain seeds.
        # TODO: un-flake it!
        np.random.seed(5555)
        W = SDML.prepare_constraints(self.iris_labels, n, num_constraints)

        # Test sparse graph inputs.
        for graph in ((W, scipy.sparse.csr_matrix(W))):
            sdml = SDML().fit(self.iris_points, graph)
            csep = class_separation(sdml.transform(), self.iris_labels)
            self.assertLess(csep, 0.25)
Exemplo n.º 12
0
  def test_iris(self):
    num_constraints = 1500

    n = self.iris_points.shape[0]
    # Note: this is a flaky test, which fails for certain seeds.
    # TODO: un-flake it!
    np.random.seed(5555)
    W = SDML.prepare_constraints(self.iris_labels, n, num_constraints)

    # Test sparse graph inputs.
    for graph in ((W, scipy.sparse.csr_matrix(W))):
      sdml = SDML().fit(self.iris_points, graph)
      csep = class_separation(sdml.transform(), self.iris_labels)
      self.assertLess(csep, 0.25)
Exemplo n.º 13
0
    def test_sdml_raises_warning_msg_installed_skggm(self):
        """Tests that the right warning message is raised if someone tries to
    use SDML but has not installed skggm, and that the algorithm fails to
    converge"""
        # TODO: remove if we don't need skggm anymore
        # case on which we know that skggm's graphical lasso fails
        # because it will return non finite values
        pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
        y_pairs = [1, -1]
        sdml = SDML(use_cov=False, balance_param=100, verbose=True)

        msg = ("There was a problem in SDML when using skggm's graphical "
               "lasso solver.")
        with pytest.raises(RuntimeError) as raised_error:
            sdml.fit(pairs, y_pairs)
        assert msg == str(raised_error.value)
Exemplo n.º 14
0
 def test_raises_no_warning_installed_skggm(self):
   # otherwise we should be able to instantiate and fit SDML and it
   # should raise no error and no ConvergenceWarning
   pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
   y_pairs = [1, -1]
   X, y = make_classification(random_state=42)
   with pytest.warns(None) as records:
     sdml = SDML(prior='covariance')
     sdml.fit(pairs, y_pairs)
   for record in records:
     assert record.category is not ConvergenceWarning
   with pytest.warns(None) as records:
     sdml_supervised = SDML_Supervised(prior='identity', balance_param=1e-5)
     sdml_supervised.fit(X, y)
   for record in records:
     assert record.category is not ConvergenceWarning
Exemplo n.º 15
0
  def test_sdml_raises_warning_msg_installed_skggm(self):
    """Tests that the right warning message is raised if someone tries to
    use SDML but has not installed skggm, and that the algorithm fails to
    converge"""
    # TODO: remove if we don't need skggm anymore
    # case on which we know that skggm's graphical lasso fails
    # because it will return non finite values
    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
    y_pairs = [1, -1]
    sdml = SDML(use_cov=False, balance_param=100, verbose=True)

    msg = ("There was a problem in SDML when using skggm's graphical "
           "lasso solver.")
    with pytest.raises(RuntimeError) as raised_error:
      sdml.fit(pairs, y_pairs)
    assert msg == str(raised_error.value)
Exemplo n.º 16
0
def sandwich_demo():
  x, y = sandwich_data()
  knn = nearest_neighbors(x, k=2)
  ax = pyplot.subplot(3, 1, 1)  # take the whole top row
  plot_sandwich_data(x, y, ax)
  plot_neighborhood_graph(x, knn, y, ax)
  ax.set_title('input space')
  ax.set_aspect('equal')
  ax.set_xticks([])
  ax.set_yticks([])

  num_constraints = 60
  mls = [
      (LMNN(), (x, y)),
      (ITML(), (x, ITML.prepare_constraints(y, len(x), num_constraints))),
      (SDML(), (x, SDML.prepare_constraints(y, len(x), num_constraints))),
      (LSML(), (x, LSML.prepare_constraints(y, num_constraints)))
  ]

  for ax_num, (ml,args) in zip(xrange(3,7), mls):
    ml.fit(*args)
    tx = ml.transform()
    ml_knn = nearest_neighbors(tx, k=2)
    ax = pyplot.subplot(3,2,ax_num)
    plot_sandwich_data(tx, y, ax)
    plot_neighborhood_graph(tx, ml_knn, y, ax)
    ax.set_title('%s space' % ml.__class__.__name__)
    ax.set_xticks([])
    ax.set_yticks([])
  pyplot.show()
Exemplo n.º 17
0
 def test_sdml_raises_warning_non_psd(self):
   """Tests that SDML raises a warning on a toy example where we know the
   pseudo-covariance matrix is not PSD"""
   pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
   y = [1, -1]
   sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5)
   msg = ("Warning, the input matrix of graphical lasso is not "
          "positive semi-definite (PSD). The algorithm may diverge, "
          "and lead to degenerate solutions. "
          "To prevent that, try to decrease the balance parameter "
          "`balance_param` and/or to set use_cov=False.")
   with pytest.warns(ConvergenceWarning) as raised_warning:
     try:
       sdml.fit(pairs, y)
     except Exception:
       pass
   # we assert that this warning is in one of the warning raised by the
   # estimator
   assert msg in list(map(lambda w: str(w.message), raised_warning))
Exemplo n.º 18
0
 def test_sdml_raises_warning_non_psd(self):
     """Tests that SDML raises a warning on a toy example where we know the
 pseudo-covariance matrix is not PSD"""
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
     y = [1, -1]
     sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5)
     msg = ("Warning, the input matrix of graphical lasso is not "
            "positive semi-definite (PSD). The algorithm may diverge, "
            "and lead to degenerate solutions. "
            "To prevent that, try to decrease the balance parameter "
            "`balance_param` and/or to set use_cov=False.")
     with pytest.warns(ConvergenceWarning) as raised_warning:
         try:
             sdml.fit(pairs, y)
         except Exception:
             pass
     # we assert that this warning is in one of the warning raised by the
     # estimator
     assert msg in list(map(lambda w: str(w.message), raised_warning))
Exemplo n.º 19
0
  def test_sdml_raises_warning_msg_not_installed_skggm(self):
    """Tests that the right warning message is raised if someone tries to
    use SDML but has not installed skggm, and that the algorithm fails to
    converge"""
    # TODO: remove if we don't need skggm anymore
    # case on which we know that scikit-learn's graphical lasso fails
    # because it will return a non SPD matrix
    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
    y_pairs = [1, -1]
    sdml = SDML(use_cov=False, balance_param=100, verbose=True)

    msg = ("There was a problem in SDML when using scikit-learn's graphical "
           "lasso solver. skggm's graphical lasso can sometimes converge on "
           "non SPD cases where scikit-learn's graphical lasso fails to "
           "converge. Try to install skggm and rerun the algorithm (see "
           "the README.md for the right version of skggm).")
    with pytest.raises(RuntimeError) as raised_error:
      sdml.fit(pairs, y_pairs)
    assert msg == str(raised_error.value)
Exemplo n.º 20
0
  def test_sdml_raises_warning_msg_not_installed_skggm(self):
    """Tests that the right warning message is raised if someone tries to
    use SDML but has not installed skggm, and that the algorithm fails to
    converge"""
    # TODO: remove if we don't need skggm anymore
    # case on which we know that scikit-learn's graphical lasso fails
    # because it will return a non SPD matrix
    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
    y_pairs = [1, -1]
    sdml = SDML(prior='identity', balance_param=100, verbose=True)

    msg = ("There was a problem in SDML when using scikit-learn's graphical "
           "lasso solver. skggm's graphical lasso can sometimes converge on "
           "non SPD cases where scikit-learn's graphical lasso fails to "
           "converge. Try to install skggm and rerun the algorithm (see "
           "the README.md for the right version of skggm).")
    with pytest.raises(RuntimeError) as raised_error:
      sdml.fit(pairs, y_pairs)
    assert msg == str(raised_error.value)
Exemplo n.º 21
0
def metricLearning(data):
    df_label = pd.read_csv('../TestAndLearn/data/outcome_labels.csv')
    #print("df_label", '\n', df_label)

    #get unique row ids
    rowIDLIst = pd.concat([df_label.id1, df_label.id2],
                          axis=0).unique().tolist()
    print("rowIDLIst", '\n', rowIDLIst)

    #connectivity graph
    cmatrix = np.zeros([len(rowIDLIst), len(rowIDLIst)])

    #print("as_Matrix", '\n', df_label.as_matrix)
    for lbl in df_label.as_matrix():

        #print ("rowIDLIst.index(lbl[0])", rowIDLIst.index(lbl[0]),"rowIDLIst.index(lbl[1])",rowIDLIst.index(lbl[1]))
        cmatrix[rowIDLIst.index(lbl[0])][rowIDLIst.index(lbl[1])] = int(lbl[2])
        cmatrix[rowIDLIst.index(lbl[1])][rowIDLIst.index(lbl[0])] = int(lbl[2])

    print("cmatrix.shape", '\n', cmatrix.shape)

    trainedData = []

    for rid in rowIDLIst:
        row = data.iloc[[rid]]
        #print "row","\n",row
        #print "rowType","\n",type(row)
        trainedData.append(row)

    #print "LentrainedData","\n", len(trainedData)

    #print "typetrainedData1", '\n', len(trainedData)

    trainedData = pd.concat(trainedData, axis=0).as_matrix()
    print("trainedData.shape", "\n", trainedData.shape)
    #print "trainedData2", "\n", trainedData

    metric = SDML().fit(trainedData, cmatrix)

    newData = metric.transform(data)
    return newData
Exemplo n.º 22
0
 def test_raises_no_warning_installed_skggm(self):
     # otherwise we should be able to instantiate and fit SDML and it
     # should raise no warning
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
     y_pairs = [1, -1]
     X, y = make_classification(random_state=42)
     with pytest.warns(None) as record:
         sdml = SDML()
         sdml.fit(pairs, y_pairs)
     assert len(record) == 0
     with pytest.warns(None) as record:
         sdml = SDML_Supervised(use_cov=False, balance_param=1e-5)
         sdml.fit(X, y)
     assert len(record) == 0
Exemplo n.º 23
0
 def test_raises_no_warning_installed_skggm(self):
   # otherwise we should be able to instantiate and fit SDML and it
   # should raise no warning
   pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
   y_pairs = [1, -1]
   X, y = make_classification(random_state=42)
   with pytest.warns(None) as record:
     sdml = SDML()
     sdml.fit(pairs, y_pairs)
   assert len(record) == 0
   with pytest.warns(None) as record:
     sdml = SDML_Supervised(use_cov=False, balance_param=1e-5)
     sdml.fit(X, y)
   assert len(record) == 0
Exemplo n.º 24
0
def test_tiwafer():
    num_constraints = 1500
    print "Loading Data...."
    tiwafer_data = load_data_sdml()
    sim_pairs = tiwafer_data.sim_pairs
    diff_pairs = tiwafer_data.diff_pairs
    sorted_ids = tiwafer_data.sortedIds
    ti_data = np.array(tiwafer_data.data)
    labels = np.array(tiwafer_data.target)

    print "Done Loading Data.\nLearning Distance Metric...."

    num_points = len(sorted_ids)
    W = prepare_constraints_old(labels, num_points, num_constraints)

    sdml = SDML()
    # W = prepare_constraints(sorted_ids, sim_pairs, diff_pairs)

    sdml.fit(ti_data, W)
    W_metric = sdml.metric()
    cPickle.dump(W_metric, open('W_metric_sdml.p', 'wb'))
    W_trans = sdml.transformer()
    with open('W_trans_sdml.p', 'wb') as handle:
        cPickle.dump(W_trans, handle)
Exemplo n.º 25
0
        # if preprocessor, we build a 2D array of quadruplets of indices
        return Dataset(c, target, X, c[:, 0])
    else:
        # if not, we build a 3D array of quadruplets of samples
        return Dataset(X[c], target, None, X[c[:, 0]])


quadruplets_learners = [(LSML(), build_quadruplets)]
ids_quadruplets_learners = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in quadruplets_learners]))

pairs_learners = [
    (ITML(max_iter=2), build_pairs),  # max_iter=2 to be faster
    (MMC(max_iter=2), build_pairs),  # max_iter=2 to be faster
    (SDML(use_cov=False, balance_param=1e-5), build_pairs)
]
ids_pairs_learners = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in pairs_learners]))

classifiers = [(Covariance(), build_classification),
               (LFDA(), build_classification), (LMNN(), build_classification),
               (NCA(), build_classification), (RCA(), build_classification),
               (ITML_Supervised(max_iter=5), build_classification),
               (LSML_Supervised(), build_classification),
               (MMC_Supervised(max_iter=5), build_classification),
               (RCA_Supervised(num_chunks=10), build_classification),
               (SDML_Supervised(use_cov=False,
                                balance_param=1e-5), build_classification)]
ids_classifiers = list(
Exemplo n.º 26
0
 def fit(self, X, y):
     num_constraints = NUM_CONSTRAINTS
     constraints = SDML.prepare_constraints(y, len(X), num_constraints)
     return super(SDML_sk, self).fit(X, constraints)
Exemplo n.º 27
0
        # if preprocessor, we build a 2D array of quadruplets of indices
        return Dataset(c, target, X, c[:, 0])
    else:
        # if not, we build a 3D array of quadruplets of samples
        return Dataset(X[c], target, None, X[c[:, 0]])


quadruplets_learners = [(LSML(), build_quadruplets)]
ids_quadruplets_learners = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in quadruplets_learners]))

pairs_learners = [
    (ITML(max_iter=2), build_pairs),  # max_iter=2 to be faster
    (MMC(max_iter=2), build_pairs),  # max_iter=2 to be faster
    (SDML(prior='identity', balance_param=1e-5), build_pairs)
]
ids_pairs_learners = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in pairs_learners]))

classifiers = [(Covariance(), build_classification),
               (LFDA(), build_classification), (LMNN(), build_classification),
               (NCA(), build_classification), (RCA(), build_classification),
               (ITML_Supervised(max_iter=5), build_classification),
               (LSML_Supervised(), build_classification),
               (MMC_Supervised(max_iter=5), build_classification),
               (RCA_Supervised(num_chunks=5), build_classification),
               (SDML_Supervised(prior='identity',
                                balance_param=1e-5), build_classification)]
ids_classifiers = list(
Exemplo n.º 28
0
        #print ("lbl[0]",lbl[0])
        #print ("lbl[1]",lbl[1])
        #print ("lbl[2]",lbl[2])
        #print ("rowIDLIst.index(lbl[0])", rowIDLIst.index(lbl[0]),"rowIDLIst.index(lbl[1])",rowIDLIst.index(lbl[1]))
        cmatrix[rowIDLIst.index(lbl[0])][rowIDLIst.index(lbl[1])] = int(lbl[2])
        cmatrix[rowIDLIst.index(lbl[1])][rowIDLIst.index(lbl[0])] = int(lbl[2])

    print "cmatrix.shape", '\n', cmatrix.shape

    trainedData = []

    for rid in rowIDLIst:
        row = df_reperent.iloc[[rid]]
        #print "row","\n",row
        #print "rowType","\n",type(row)
        trainedData.append(row)

    #print "LentrainedData","\n", len(trainedData)

    #print "typetrainedData1", '\n', len(trainedData)

    trainedData = pd.concat(trainedData, axis=0).as_matrix()
    print "trainedData.shape", "\n", trainedData.shape
    #print "trainedData2", "\n", trainedData

    metric = SDML().fit(trainedData, cmatrix)

    newData = metric.transform(df_reperent)
    print type(newData)
    print newData.shape
Exemplo n.º 29
0
        # if preprocessor, we build a 2D array of quadruplets of indices
        return Dataset(c, target, X, c[:, 0])
    else:
        # if not, we build a 3D array of quadruplets of samples
        return Dataset(X[c], target, None, X[c[:, 0]])


quadruplets_learners = [(LSML(), build_quadruplets)]
ids_quadruplets_learners = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in quadruplets_learners]))

pairs_learners = [
    (ITML(), build_pairs),
    (MMC(max_iter=2), build_pairs),  # max_iter=2 for faster
    (SDML(), build_pairs),
]
ids_pairs_learners = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in pairs_learners]))

classifiers = [(Covariance(), build_classification),
               (LFDA(), build_classification), (LMNN(), build_classification),
               (NCA(), build_classification), (RCA(), build_classification),
               (ITML_Supervised(max_iter=5), build_classification),
               (LSML_Supervised(), build_classification),
               (MMC_Supervised(max_iter=5), build_classification),
               (RCA_Supervised(num_chunks=10), build_classification),
               (SDML_Supervised(), build_classification)]
ids_classifiers = list(
    map(lambda x: x.__class__.__name__,