示例#1
0
def update(request):

    df_label = pd.read_csv(
        os.path.join(settings.BASE_DIR, 'data/outcome_labels.csv'))
    print("df_label", '\n', df_label)

    df_data = pd.read_csv(
        os.path.join(settings.BASE_DIR, 'data/features_rep.csv'))
    #print("df_data", df_data)

    #get unique row ids
    rowIDLIst = pd.concat([df_label.id1, df_label.id2],
                          axis=0).unique().tolist()
    #rowIDLIst2 = pd.concat([df_label.id1,df_label.id2],axis = 1).unique().tolist()
    print("rowIDLIst", '\n', rowIDLIst)
    #print("rowIDLIst2",'\n', rowIDLIst2)

    #connectivity graph
    cmatrix = np.zeros([len(rowIDLIst), len(rowIDLIst)])

    print("as_Matrix", '\n', df_label.as_matrix)
    for lbl in df_label.as_matrix():
        print("lbl", lbl)
        print("lbl[0]", lbl[0])
        print("lbl[1]", lbl[1])
        print("lbl[2]", lbl[2])
        print("rowIDLIst.index(lbl[0])", rowIDLIst.index(lbl[0]),
              "rowIDLIst.index(lbl[1])", rowIDLIst.index(lbl[1]))
        cmatrix[rowIDLIst.index(lbl[0])][rowIDLIst.index(lbl[1])] = int(lbl[2])
        cmatrix[rowIDLIst.index(lbl[1])][rowIDLIst.index(lbl[0])] = int(lbl[2])

    print("cmatrix", '\n', cmatrix)

    trainedData = []

    for rid in rowIDLIst:
        row = df_data.iloc[[rid]]
        trainedData.append(row)

    print("trainedData1", '\n', trainedData)

    trainedData = pd.concat(trainedData, axis=0).as_matrix()

    print("trainedData2" "\n", trainedData)

    metric = SDML().fit(trainedData, cmatrix)

    newData = metric.transform(df_data)

    al_selection = request.session['clustering']
    num_clustering = request.session['num_cluster']

    clusteringAndTSNE(newData, al_selection, num_clustering)
    # context is a dict of html code, containing three types of features representation
    content = {'Title': "Step 7: Clustering Visualization", "listId": "li7"}
    return render(request, 'clustering/stp7-clu-visualisation.html', content)
示例#2
0
def update():

    df_label = pd.read_csv('data/outcome_labels.csv')
    print "df_label", '\n', df_label

    df_data = pd.read_csv('data/features_rep.csv')
    #df_data = pd.read_csv('data/alvin_rep.csv')
    print "df_data", '\n', df_data

    #print("df_data", df_data)

    #get unique row ids
    rowIDLIst = pd.concat([df_label.id1, df_label.id2],
                          axis=0).unique().tolist()
    #rowIDLIst2 = pd.concat([df_label.id1,df_label.id2],axis = 1)
    print "rowIDLIst", '\n', rowIDLIst
    #print("rowIDLIst2",'\n', rowIDLIst2)

    #connectivity graph
    cmatrix = np.zeros([len(rowIDLIst), len(rowIDLIst)])

    #print("as_Matrix", '\n', df_label.as_matrix)
    for lbl in df_label.as_matrix():
        #print ("lbl",lbl)
        #print ("lbl[0]",lbl[0])
        #print ("lbl[1]",lbl[1])
        #print ("lbl[2]",lbl[2])
        #print ("rowIDLIst.index(lbl[0])", rowIDLIst.index(lbl[0]),"rowIDLIst.index(lbl[1])",rowIDLIst.index(lbl[1]))
        cmatrix[rowIDLIst.index(lbl[0])][rowIDLIst.index(lbl[1])] = int(lbl[2])
        cmatrix[rowIDLIst.index(lbl[1])][rowIDLIst.index(lbl[0])] = int(lbl[2])

    print "cmatrixShape", '\n', cmatrix.shape

    trainedData = []

    for rid in rowIDLIst:
        row = df_data.iloc[[rid]]
        #print "row","\n",row
        #print "rowType","\n",type(row)
        trainedData.append(row)
        #print "trainedData","\n", trainedData

    #print "typetrainedData1", '\n', len(trainedData)

    trainedData = pd.concat(trainedData, axis=0).as_matrix()

    #print "trainedData2", "\n", trainedData
    print "trainedData.shape", '\n', trainedData.shape
    metric = SDML().fit(trainedData, cmatrix)

    newData = metric.transform(df_data)

    clusteringAndTSNE(newData)
示例#3
0
  def test_iris(self):
    num_constraints = 1500

    n = self.iris_points.shape[0]
    np.random.seed(1234)
    W = SDML.prepare_constraints(self.iris_labels, n, num_constraints)

    # Test sparse graph inputs.
    for graph in ((W, scipy.sparse.csr_matrix(W))):
      sdml = SDML().fit(self.iris_points, graph)
      csep = class_separation(sdml.transform(), self.iris_labels)
      self.assertLess(csep, 0.25)
示例#4
0
  def test_iris(self):
    num_constraints = 1500

    n = self.iris_points.shape[0]
    np.random.seed(1234)
    W = SDML.prepare_constraints(self.iris_labels, n, num_constraints)

    # Test sparse graph inputs.
    for graph in ((W, scipy.sparse.csr_matrix(W))):
      sdml = SDML().fit(self.iris_points, graph)
      csep = class_separation(sdml.transform(), self.iris_labels)
      self.assertLess(csep, 0.25)
示例#5
0
  def test_iris(self):
    num_constraints = 1500

    n = self.iris_points.shape[0]
    # Note: this is a flaky test, which fails for certain seeds.
    # TODO: un-flake it!
    np.random.seed(5555)
    W = SDML.prepare_constraints(self.iris_labels, n, num_constraints)

    # Test sparse graph inputs.
    for graph in ((W, scipy.sparse.csr_matrix(W))):
      sdml = SDML().fit(self.iris_points, graph)
      csep = class_separation(sdml.transform(), self.iris_labels)
      self.assertLess(csep, 0.25)
示例#6
0
    def test_iris(self):
        num_constraints = 1500

        n = self.iris_points.shape[0]
        # Note: this is a flaky test, which fails for certain seeds.
        # TODO: un-flake it!
        np.random.seed(5555)
        W = SDML.prepare_constraints(self.iris_labels, n, num_constraints)

        # Test sparse graph inputs.
        for graph in ((W, scipy.sparse.csr_matrix(W))):
            sdml = SDML().fit(self.iris_points, graph)
            csep = class_separation(sdml.transform(), self.iris_labels)
            self.assertLess(csep, 0.25)
示例#7
0
def metricLearning(data):
    df_label = pd.read_csv('../TestAndLearn/data/outcome_labels.csv')
    #print("df_label", '\n', df_label)

    #get unique row ids
    rowIDLIst = pd.concat([df_label.id1, df_label.id2],
                          axis=0).unique().tolist()
    print("rowIDLIst", '\n', rowIDLIst)

    #connectivity graph
    cmatrix = np.zeros([len(rowIDLIst), len(rowIDLIst)])

    #print("as_Matrix", '\n', df_label.as_matrix)
    for lbl in df_label.as_matrix():

        #print ("rowIDLIst.index(lbl[0])", rowIDLIst.index(lbl[0]),"rowIDLIst.index(lbl[1])",rowIDLIst.index(lbl[1]))
        cmatrix[rowIDLIst.index(lbl[0])][rowIDLIst.index(lbl[1])] = int(lbl[2])
        cmatrix[rowIDLIst.index(lbl[1])][rowIDLIst.index(lbl[0])] = int(lbl[2])

    print("cmatrix.shape", '\n', cmatrix.shape)

    trainedData = []

    for rid in rowIDLIst:
        row = data.iloc[[rid]]
        #print "row","\n",row
        #print "rowType","\n",type(row)
        trainedData.append(row)

    #print "LentrainedData","\n", len(trainedData)

    #print "typetrainedData1", '\n', len(trainedData)

    trainedData = pd.concat(trainedData, axis=0).as_matrix()
    print("trainedData.shape", "\n", trainedData.shape)
    #print "trainedData2", "\n", trainedData

    metric = SDML().fit(trainedData, cmatrix)

    newData = metric.transform(data)
    return newData
示例#8
0
        #print ("lbl[0]",lbl[0])
        #print ("lbl[1]",lbl[1])
        #print ("lbl[2]",lbl[2])
        #print ("rowIDLIst.index(lbl[0])", rowIDLIst.index(lbl[0]),"rowIDLIst.index(lbl[1])",rowIDLIst.index(lbl[1]))
        cmatrix[rowIDLIst.index(lbl[0])][rowIDLIst.index(lbl[1])] = int(lbl[2])
        cmatrix[rowIDLIst.index(lbl[1])][rowIDLIst.index(lbl[0])] = int(lbl[2])

    print "cmatrix.shape", '\n', cmatrix.shape

    trainedData = []

    for rid in rowIDLIst:
        row = df_reperent.iloc[[rid]]
        #print "row","\n",row
        #print "rowType","\n",type(row)
        trainedData.append(row)

    #print "LentrainedData","\n", len(trainedData)

    #print "typetrainedData1", '\n', len(trainedData)

    trainedData = pd.concat(trainedData, axis=0).as_matrix()
    print "trainedData.shape", "\n", trainedData.shape
    #print "trainedData2", "\n", trainedData

    metric = SDML().fit(trainedData, cmatrix)

    newData = metric.transform(df_reperent)
    print type(newData)
    print newData.shape