示例#1
0
def main():
    files = []
    for lang, addr in code_sources:
        data = requests.get(addr).text
        contents = io.StringIO("\n".join(data.split('\n')[:1000]))
        outline = ''.join(
            silhouette.silhouette(contents, " ",
                                  "<span style='background: black;'>",
                                  "</span>"))
        files.append((addr, lang, outline))

    cells = []
    for f in files:
        cells.append(make_cell(f))
    table = '<div class="lfbtable">{}\n</div>'
    table = table.format('\n'.join(cells))
    print(PAGE.format(CSS, table, "", JAVASCRIPT))
import sys
import cPickle
from sklearn.preprocessing import scale
import silhouette

# import some data to play with
warnings.simplefilter("ignore")

db = cPickle.load(open(sys.argv[1],'r'))
Y = np.array([db[i][0] for i in db.keys()]).astype(int)
X = np.array([db[i][1:] for i in db.keys()])
 
s = float(Y.shape[0])
priors = np.array([float(np.where(Y == i)[0].shape[0])/s for i in range(1,Y.max()+1)])

classifiers = ['nb','knn','lda','qda']


clf = [pipeline.Pipeline([('pca',decomposition.PCA(n_components = 6,whiten = False)),('nb',naive_bayes.GaussianNB())]),
           pipeline.Pipeline([('pca',decomposition.PCA(n_components = 6,whiten = False)),('knn',neighbors.KNeighborsClassifier(n_neighbors = 1))]),
           pipeline.Pipeline([('pca',decomposition.PCA(n_components = 6,whiten = False)),('lda',lda.LDA())]),
		   pipeline.Pipeline([('pca',decomposition.PCA(n_components = 6,whiten = False)),('qda',qda.QDA())])]

s = silhouette.silhouette(X,Y-1)
print np.median(s)
it = cross_validation.KFold(Y.size,n_folds = 10)
for c,cn in zip(clf,classifiers):
  res = cross_validation.cross_val_score(c,scale(X),Y,cv = it,scoring = "f1_weighted")
  print  cn+': ',res.mean(),res.std()
  
示例#3
0
#Organize Data as X/Y Coordinates
x = arcpy.da.TableToNumPyArray(input_points, "SHAPE@X").astype(float)
y = arcpy.da.TableToNumPyArray(input_points, "SHAPE@Y").astype(float)
X = np.array(list(zip(x,y)))

#Check for Optimization
if str(k_optimized)=='true':
    try:
        k_max = 20
        Silhouettes=[]
        K=2
        while K < 20:
            arcpy.AddMessage("Testing K = "+str(K))
            Centroids = plusplus(X,K)
            Cxy, points, clusters = k_means(X,K,Centroids)
            average, sils = silhouette(points)
            Silhouettes.append(average)
            K+=1
        arcpy.AddMessage('\r'+'\n'+"Average Silhouette Values:"+'\n'+'\r')
        arcpy.AddMessage(Silhouettes)
        
    except Exception as e:
        exc_tb = sys.exc_info()[2] #Get Line Number
        arcpy.AddError('\n'
                       +"Error Optimizing K: \n\n\t"+"In line "
                       +str(exc_tb.tb_lineno)+": "+str(e.message)+"\n")
else:      
    #Control for Error in K input
    if k > len(X):
        arcpy.AddError("ERROR: K must be less than or equal to number of points")
        quit()
示例#4
0
def dr_cluster(data, method, gamma, params, clusters, stepsize, rows_toload,
               dropped_class_numbers):
    if (method == "Kmeans2D"):
        components = 2
    if (method == "Kmeans1D" or method == "Thresholding"):
        components = 1
        flag = 0
        resetflag = 0
    logger.writelog(components, "Components")
    logger.result_open(method)
    print(method)
    max_sc = -100.0
    best_purity = 0.0
    best_gamma = 0.0
    serial_num = 0
    try:
        for i in range(0, params + 1):
            transformer = KernelPCA(n_components=components,
                                    kernel='rbf',
                                    gamma=gamma)
            data_transformed = transformer.fit_transform(data)
            df = pd.DataFrame(data_transformed)
            df.to_csv(KPCA_output_path, index=False, header=None)
            del df
            gc.collect()
            if (method == "Thresholding"):
                if (flag == 0):
                    os.system("cc c_thresholding_new.c")
                    flag = 1
                start = timeit.default_timer()
                os.system("./a.out " + str(clusters) + " " + str(rows_toload))
                end = timeit.default_timer()
                thresholding_time = (end - start)
                sc = silhouette.silhouette(KPCA_output_path,
                                           Thresholding_paths[1])
                groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, purity = hungarian.hungarian(
                    't', Thresholding_paths[0], clusters, rows_toload,
                    dropped_class_numbers)
                logger.writeresult(i + 1, clusters, method, thresholding_time,
                                   gamma, sc, purity)
                #print(i+1,thresholding_time,gamma,sc,purity)
                if (i < params):
                    if (sc > max_sc):
                        max_sc = sc
                        best_gamma = gamma
                        best_purity = purity
                        serial_num = i + 1
                if (i == (params - 1)):
                    gamma = best_gamma
                    sc = max_sc
                    purity = best_purity
                if (i == params):
                    print(best_gamma, max_sc, best_purity)
                    logger.writeresult(" ", " ", " ", " ", " ", " ", " ")
                    logger.writeresult(serial_num, clusters, method,
                                       thresholding_time, best_gamma, max_sc,
                                       best_purity)
                    logger.writeresult(" ", " ", " ", " ", " ", " ", " ")
                    logger.writefinalresult(serial_num, clusters, method,
                                            thresholding_time, best_gamma,
                                            max_sc, best_purity)
                    write_hungarian_result(best_gamma, clusters,
                                           groundtruth_distribution,
                                           temp_assignment_error_matrix,
                                           row_ind, col_ind, class_numbers,
                                           best_purity, method, params,
                                           stepsize, dropped_class_numbers)
            else:
                kmeans_time = kmeans.kmeans(KPCA_output_path, KMeans_paths[1],
                                            clusters)
                kmeans.groundtruth_distribution(KMeans_paths[1],
                                                KMeans_paths[0],
                                                datafiles_names[0],
                                                datafiles_names[2], clusters)
                sc = silhouette.silhouette(KPCA_output_path, KMeans_paths[1])
                groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, purity = hungarian.hungarian(
                    'k', KMeans_paths[0], clusters, rows_toload,
                    dropped_class_numbers)
                logger.writeresult(i + 1, clusters, method, kmeans_time, gamma,
                                   sc, purity)
                #print(i+1,kmeans_time,gamma,sc,purity)
                if (i < params):
                    if (sc > max_sc):
                        max_sc = sc
                        best_gamma = gamma
                        best_purity = purity
                        serial_num = i + 1
                if (i == (params - 1)):
                    gamma = best_gamma
                    sc = max_sc
                    purity = best_purity
                if (i == params):
                    print(best_gamma, max_sc, best_purity)
                    logger.writeresult(" ", " ", " ", " ", " ", " ", " ")
                    logger.writeresult(serial_num, clusters, method,
                                       kmeans_time, best_gamma, max_sc,
                                       best_purity)
                    logger.writeresult(" ", " ", " ", " ", " ", " ", " ")
                    logger.writefinalresult(serial_num, clusters, method,
                                            kmeans_time, best_gamma, max_sc,
                                            best_purity)
                    write_hungarian_result(best_gamma, clusters,
                                           groundtruth_distribution,
                                           temp_assignment_error_matrix,
                                           row_ind, col_ind, class_numbers,
                                           best_purity, method, params,
                                           stepsize, dropped_class_numbers)
            if (i < (params - 1)):
                gamma = gamma + stepsize
    except (KeyboardInterrupt, SystemExit, Exception) as ex:
        ex_type, ex_value, ex_traceback = sys.exc_info()
        trace_back = traceback.extract_tb(ex_traceback)
        logger.writelog(str(ex_type.__name__), "Exception Type")
        logger.writelog(str(ex_value), "Exception Message")
        logger.writelog(str(trace_back), "Traceback")
    finally:
        logger.result_close()
示例#5
0
 with open(path+"names.pkl","r") as g:
   cl = cPickle.load(f)
   nomes = cPickle.load(g)

db = {}

for im_file in nomes:
 nmbe = desc.bendenergy(path+im_file,sigma)
 db[im_file] = numpy.hstack((cl[im_file],numpy.log(nmbe())))

# nome das figuras
data1 = numpy.array([db[i] for i in db.keys()])

Y = data1[:,0].astype(int)
X1 = scale(data1[:,1:])
s = silhouette.silhouette(X1,Y-1)
print numpy.median(numpy.abs(1.-  s))

#iso = Isomap(n_neighbors=98, max_iter= 2500)
mds =  MDS(n_init = 20,dissimilarity = 'euclidean',max_iter = 2500)
#X1 = iso.fit_transform(data1[:,1:])
X1 = mds.fit_transform(data1[:,1:])

r = ((pdist(data1[:,1:]) - pdist(X1))**2).sum()
s = ((pdist(X1)-pdist(X1).mean())**2).sum()
R2 = 1-r/s
print R2
data = numpy.vstack((Y,X1.transpose())).transpose()

db = dict(zip(db.keys(),data))