def prediction(main_sym, add_sym, ton, pul): ncluster = 10 nsample = 500 feature_dict, tag_dict = cnn.create_dict('./app/algo/data/') vec = cnn.create_vec(main_sym, add_sym, ton, pul, feature_dict) workbook = cnn.xlrd.open_workbook('./app/algo/data/data.xlsx') sheet = workbook.sheet_by_index(1) rows = sheet.nrows raw = [] for i in range(1, rows): row = [] for j in range(5): row.append(sheet.row_values(i)[j]) raw.append(row) X, y = cnn.create_matrix(raw, feature_dict, tag_dict) X = X[0:nsample] X.append(vec) X = construct_W(np.array(X)) y_pred = SpectralClustering(n_clusters=ncluster, gamma=1, affinity='precomputed', n_jobs=1).fit_predict(X) cluster = [] for i in range(nsample): if y_pred[i] == y_pred[-1]: cluster.append(y[i]) count = np.sum(cluster, axis=0) thre = np.max(count) * 0.75 res = [] for i in range(len(count)): if count[i] >= thre: res.append(list(tag_dict.keys())[list(tag_dict.values()).index(i)]) return res
def train(): ncluster = 10 nsample = 500 feature_dict, tag_dict = cnn.create_dict('./data/') workbook = cnn.xlrd.open_workbook('data/data.xlsx') sheet = workbook.sheet_by_index(1) rows = sheet.nrows raw = [] for i in range(1, rows): row = [] for j in range(5): row.append(sheet.row_values(i)[j]) # print(row) raw.append(row) X, y = cnn.create_matrix(raw, feature_dict, tag_dict) X = np.array(X[0:nsample]) X = construct_W(X) y_pred = SpectralClustering(n_clusters=ncluster, gamma=1, affinity='nearest_neighbors').fit_predict(X) clusters = [[] for i in range(ncluster)] tags = [] for i in range(nsample): clusters[y_pred[i]].append(y[i]) for i in clusters: count = np.sum(i, axis=0) thre = np.max(count) * 0.75 tag = [1 if j > thre else 0 for j in count] tags.append(tag) y_ = [tags[y_pred[i]] for i in range(nsample)] h, o, c = cnn.assess(y_, y[0:nsample]) print('Hamming Loss:' + str(h) + '\t One-Error:' + str(o) + '\t Coverage:' + str(c))
def prediction(main_sym, add_sym, ton, pul): feature_dict, tag_dict = cnn.create_dict('./app/algo/data/') workbook = cnn.xlrd.open_workbook('./app/algo/data/data.xlsx') sheet = workbook.sheet_by_index(1) rows = sheet.nrows raw = [] for i in range(1, rows): row = [] for j in range(5): row.append(sheet.row_values(i)[j]) raw.append(row) X, y_ = cnn.create_matrix(raw, feature_dict, tag_dict) y = [i.index(1) for i in y_] classifier = AdaBoostClassifier() classifier.fit(X, y) feature_dict, tag_dict = cnn.create_dict('./app/algo/data/') vec = cnn.create_vec(main_sym, add_sym, ton, pul, feature_dict) y_ = classifier.predict([vec])[0] return [list(tag_dict.keys())[list(tag_dict.values()).index(y_)]]
def train(): feature_dict, tag_dict = cnn.create_dict('./data/') workbook = cnn.xlrd.open_workbook('data/data.xlsx') sheet = workbook.sheet_by_index(1) rows = sheet.nrows raw = [] for i in range(1, rows): row = [] for j in range(5): row.append(sheet.row_values(i)[j]) raw.append(row) X, y = cnn.create_matrix(raw, feature_dict, tag_dict) classifier = RandomForestClassifier() classifier.fit(X, y) y_ = classifier.predict(X) h, o, c = cnn.assess(y_, y) print('Hamming Loss:' + str(h) + '\t One-Error:' + str(o) + '\t Coverage:' + str(c))
def train(): feature_dict, tag_dict = cnn.create_dict('./data/') workbook = cnn.xlrd.open_workbook('data/data.xlsx') sheet = workbook.sheet_by_index(1) rows = sheet.nrows raw = [] for i in range(1, rows): row = [] for j in range(5): row.append(sheet.row_values(i)[j]) raw.append(row) X, y_ = cnn.create_matrix(raw, feature_dict, tag_dict) y = [i.index(1) for i in y_] classifier = AdaBoostClassifier() classifier.fit(X, y) y_ = classifier.predict(X) accuracy, micro_precise, micro_recall, micro_f1, macro_precise, macro_recall, macro_f1 = assess( y_, y) print('Accuracy:' + str(accuracy) + '\t Micro_Precise:' + str(micro_precise) + '\t Micro_Recall:' + str(micro_recall) + '\t Micro_F1:' + str(micro_f1) + '\t Macro_Precise:' + str(macro_precise) + '\t Macro_Recall:' + str(macro_recall) + '\t Macro_F1:' + str(macro_f1))