def predict(): """ Method: POST Predict topic ids from given docs :return: topic ids """ content = request.json if not isinstance(content, dict): return 'Not json format', 400 if "docs" not in content: return 'field `docs` not present', 400 docs = content['docs'] if not isinstance(docs, list) or len(docs) == 0: return '`docs` should be list and not empty', 400 for doc in docs: if not isinstance(doc, str): return 'Wrong data type of doc', 400 topics = kmeans.predict(docs).tolist() return {'topics': topics}, 200
def process(selectedCrime, type): colors = [ 'red', 'orange', 'yellow', 'green', 'blue', 'purple', 'black', 'white', 'cyan', 'brown' ] finalData = {} for year in range(2005, 2017): print(year) finalData[str(year)] = {} for month in range(1, 13): print(' ' + str(month)) finalData[str(year)][str(month)] = {} for num_clusters in range(2, 11): print(' ' + str(num_clusters)) points = {"type": "FeatureCollection", "features": []} finalData[str(year)][str(month)][str(num_clusters)] = {} clusterData = [[] for Null in range(num_clusters)] geodesic_clusters, coordinate_array, data_array = get_clusters( selectedCrime=selectedCrime, type='geodesic', month=month, year=year, num_clusters=num_clusters) euclidean_clusters, co, da = get_clusters( selectedCrime=selectedCrime, type='euclidean', month=month, year=year, num_clusters=num_clusters) for coor in coordinate_array: c_geodesic = predict( num_clusters, geodesic_clusters, [float(coor[0]), float(coor[1])], type='geodesic') c_euclidean = predict( num_clusters, euclidean_clusters, [float(coor[0]), float(coor[1])], type='euclidean') clusterData[int(c_geodesic)].append( [float(coor[1]), float(coor[0])]) points["features"].append({ "geometry": { "type": "Point", "coordinates": [float(coor[1]), float(coor[0])] }, "type": "Feature", "properties": { "fillColor": colors[c_euclidean], "euclidean_cluster": c_euclidean, "geodesic_cluster": c_geodesic, "popupContent": "" }, }) finalData[str(year)][str(month)][str( num_clusters)]['points'] = points for i in range(0, num_clusters): try: finalData[str(year)][str(month)][str(num_clusters)][ str(i)] = build_cluster_data( clusterData[i], selected_crime=selectedCrime, cluster=i) except: try: finalData[str(year)][str(month)][str( num_clusters)][str(i)] = build_cluster_data( clusterData[i], selected_crime=selectedCrime, cluster=i) except: print('continuing') continue fn = open('../static/' + selectedCrime + '/' + type + '_final_points.js', 'w') fn.write('var geodesic_data = ') fn.write(json.dumps(finalData)) fn.write(';\n') fn.close()
from sklearn.datasets import load_digits from sklearn.metrics import fowlkes_mallows_score from sklearn.cluster import AgglomerativeClustering, AffinityPropagation from kmeans import kmeans, Point, predict data, target = load_digits(return_X_y=True) # K-Means kmeans_data = [Point(val) for val in data] k_means = kmeans(kmeans_data, 10) labels = [] for point in data: labels.append(predict(k_means, Point(point))) target = [int(num) for num in target] results = [[0 for _ in range(10)] for __ in range(10)] for i, val in enumerate(labels): results[target[i]][val] += 1 conversion = {} for t_i, targ in enumerate(results): max_cluster = None for c_i, cluster in enumerate(targ): if max_cluster is None or cluster > targ[max_cluster]: max_cluster = c_i conversion[t_i] = max_cluster
print "source data's length: ", len(tranSourceFeature) #experiment 1 testData, testLabel = readTestData(targetPath) print "target outlier length: ", len(testLabel) ori_testData = testData K = 5 transferMatrix = np.array(transferMatrix).T testData = np.array(testData).T outlierTest = dot(transferMatrix, testData) outlierTest = outlierTest.T outlierTest.tolist() sigma = 0.0005 kernelTranSourceFeature = kernel(tranSourceFeature, sigma) centroids, clusterAssment, radiusCluster = kmeansAlgorithm( mat(kernelTranSourceFeature), K) numSamples = len(data) # clusterAssment = mat(zeros((numSamples, 1))) # for i in xrange(numSamples): # clusterAssment[i, :] = predict_label[i] purity = calcPurity(clusterAssment, K, sourceLabel) print "purity", purity sumPurity = 0.0 for i in purity: sumPurity += i print "avg purity", sumPurity / len(purity) predict(mat(kernel(tranTargetFeature, sigma)), centroids, K, radiusCluster)
# Pulses, E, PVI prefix = os.environ["INFUSION_DATASETS"] data = torch.load(f'{prefix}/pulses_E_PVI.pt') # Clustering km = torch.load("_kmeans/km.pt") # Model st = torch.load('st/out64/mon3') model.load_state_dict(st) model.train(False) #--- Distribution of E within cluster --- x = data["pulses"].view([-1, 128]) y = model(x) c = kmeans.predict(km, y) E = (data["E"].unsqueeze(1).repeat(1, 256).view([-1])) df = pd.DataFrame({"E": E.numpy(), "C": c.numpy()}) pivot = df.pivot(columns="C", values="E") def seriesE(i): return pivot[i][pivot[i].notnull()] ax = sns.catplot(y="E", col="C", col_wrap=8, kind="violin",