def create_cluster_members(labels, records, number_of_topics): clusters = [] for i in range(number_of_topics): cluster = Cluster("") for j in range(len(labels)): if labels[j] == i: cluster.add_record(records[j]) cluster.title = cluster.get_records()[0].a_raw clusters.append(cluster) return clusters
def incremental(train_records, num_clusters): global sw sw = load_stop_words(Config.stop_words_address) lucene.initVM(vmargs=['-Djava.awt.headless=true']) # load_data(path) global answers_train answers_train = [rec.a_pre for rec in train_records] res, repo = do_cluster(Config.threshold) res = [cl for cl in res if len(cl) > 1] cluss = [] for cl in res: cll = Cluster(train_records[cl[0]]) for numb in cl: # cll.records.append(train_records[numb]) cll.add_record(train_records[numb]) # cll.add_doc((numb, answers_train[numb])) cluss.append(cll) return cluss