def main(): import sys dbfile = sys.argv[1] algor = sys.argv[2] #dbfile = '../data/topicgj.db' s,e = dbfile.rindex('/')+1, dbfile.rindex('.') outfile = '../result/%s_orange_fmt.tab' % dbfile[s:e] real_labels = cmpcluster.load_doc_labels(dbfile) if not os.path.exists(outfile): transfer_data_file(dbfile,outfile) #os.system('emacs '+ outfile) res_outfile = '../result/%s_cluster_result.txt' % algor data = load_data_to_array(outfile) if algor == 'kmeans': kmeans_cluster_test(data, real_labels, res_outfile) elif algor == 'tree': tree_cluster_test(data, real_labels, res_outfile) elif algor == 'som': som_cluster_test(data, real_labels, res_outfile) elif algor == 'dbscan': dbscan_cluster_test(data,real_labels, res_outfile) else: raise NotImplementedError os.system('emacs '+res_outfile)
def main(): import sys dbfile = sys.argv[1] algor = sys.argv[2] #dbfile = '../data/topicgj.db' s, e = dbfile.rindex('/') + 1, dbfile.rindex('.') outfile = '../result/%s_orange_fmt.tab' % dbfile[s:e] real_labels = cmpcluster.load_doc_labels(dbfile) if not os.path.exists(outfile): transfer_data_file(dbfile, outfile) #os.system('emacs '+ outfile) res_outfile = '../result/%s_cluster_result.txt' % algor data = load_data_to_array(outfile) if algor == 'kmeans': kmeans_cluster_test(data, real_labels, res_outfile) elif algor == 'tree': tree_cluster_test(data, real_labels, res_outfile) elif algor == 'som': som_cluster_test(data, real_labels, res_outfile) elif algor == 'dbscan': dbscan_cluster_test(data, real_labels, res_outfile) else: raise NotImplementedError os.system('emacs ' + res_outfile)
def classify(dbfile, run_num, log_info=None): real = cmpcluster.load_doc_labels(dbfile) print 'sample_test %d' % run_num metrics = list() cb = CommunityBuilder(dbfile, log_info) for i in range(run_num): predicted = cb.build() metrics.append(cmp_cluster(predicted, real)) mean, std = mean_std(metrics) return (mean, std)
def classify(dbfile, run_num, log_info=None): real = cmpcluster.load_doc_labels(dbfile) print 'sample_test %d' % run_num metrics = list() cb = CommunityBuilder(dbfile,log_info) for i in range(run_num): predicted = cb.build() metrics.append(cmp_cluster(predicted,real)) mean,std = mean_std(metrics) return (mean,std)
def main(): import pretext import extract_keyword2 import worddf dbfile = '../data/sougou.db' logsteadyfile = '../result/sougou.log' steadyfile = open(logsteadyfile,'w') steadyfile.write(out_result_header()) if not os.path.exists(dbfile): pretext.load_topiclist(dbfile,'/home/cs/download/cluster_data/sougou') eva = WordWeightEvaluation(30,'../data/worddf') ke = extract_keyword2.DBKeywordExtractor(dbfile,eva) ke.init_db() ke.content_keyword() ke.title_keyword() ke.topic_keyword() ke.close_db() cb = CommunityBuilder(dbfile) metrics = list() c = 1 real = cmpcluster.load_doc_labels(dbfile) print 'fudan' for i in range(c): print 'Time %d' % (i+1) predicted = cb.build(max_depth=5, min_doc_num=20) metrics.append(cmp_cluster(predicted,real)) mean,std = mean_std(metrics) meanstr = '%.1f \t%.3f \t%.3f \t%.3f \t%.3f\n' % tuple(mean) stdstr = '%.1f \t%.3f \t%.3f \t%.3f \t%.3f\n' % tuple(std) steadyfile.write(meanstr) steadyfile.write(stdstr) steadyfile.close() os.system('emacs '+logsteadyfile)
def main(): import preproc_qqtopic import extract_keyword2 import worddf dbfile = '../data/steady_test.db' logsteadyfile = '../result/steady_test.log' steadyfile = open(logsteadyfile,'w') steadyfile.write(out_result_header()) if not os.path.exists(dbfile): preproc_qqtopic.load_topiclist(dbfile,'../data/topicgj') ke = extract_keyword2.DBKeywordExtractor(dbfile) ke.init_db() ke.content_keyword() ke.title_keyword() ke.topic_keyword() ke.close_db() cb = CommunityBuilder(dbfile) metrics = list() c = 50 real = cmpcluster.load_doc_labels(dbfile) print 'steady_test' for i in range(c): print 'Time %d' % (i+1) predicted = cb.build() metrics.append(cmp_cluster(predicted,real)) mean,std = mean_std(metrics) meanstr = '%.1f \t%.3f \t%.3f \t%.3f \t%.3f\n' % tuple(mean) stdstr = '%.1f \t%.3f \t%.3f \t%.3f \t%.3f\n' % tuple(std) steadyfile.write(meanstr) steadyfile.write(stdstr) steadyfile.close() os.system('emacs '+logsteadyfile)
def main(): import pretext import extract_keyword2 import worddf dbfile = '../data/sougou.db' logsteadyfile = '../result/sougou.log' steadyfile = open(logsteadyfile, 'w') steadyfile.write(out_result_header()) if not os.path.exists(dbfile): pretext.load_topiclist(dbfile, '/home/cs/download/cluster_data/sougou') eva = WordWeightEvaluation(30, '../data/worddf') ke = extract_keyword2.DBKeywordExtractor(dbfile, eva) ke.init_db() ke.content_keyword() ke.title_keyword() ke.topic_keyword() ke.close_db() cb = CommunityBuilder(dbfile) metrics = list() c = 1 real = cmpcluster.load_doc_labels(dbfile) print 'fudan' for i in range(c): print 'Time %d' % (i + 1) predicted = cb.build(max_depth=5, min_doc_num=20) metrics.append(cmp_cluster(predicted, real)) mean, std = mean_std(metrics) meanstr = '%.1f \t%.3f \t%.3f \t%.3f \t%.3f\n' % tuple(mean) stdstr = '%.1f \t%.3f \t%.3f \t%.3f \t%.3f\n' % tuple(std) steadyfile.write(meanstr) steadyfile.write(stdstr) steadyfile.close() os.system('emacs ' + logsteadyfile)
def main(): import preproc_qqtopic import extract_keyword2 import worddf dbfile = '../data/steady_test.db' logsteadyfile = '../result/steady_test.log' steadyfile = open(logsteadyfile, 'w') steadyfile.write(out_result_header()) if not os.path.exists(dbfile): preproc_qqtopic.load_topiclist(dbfile, '../data/topicgj') ke = extract_keyword2.DBKeywordExtractor(dbfile) ke.init_db() ke.content_keyword() ke.title_keyword() ke.topic_keyword() ke.close_db() cb = CommunityBuilder(dbfile) metrics = list() c = 50 real = cmpcluster.load_doc_labels(dbfile) print 'steady_test' for i in range(c): print 'Time %d' % (i + 1) predicted = cb.build() metrics.append(cmp_cluster(predicted, real)) mean, std = mean_std(metrics) meanstr = '%.1f \t%.3f \t%.3f \t%.3f \t%.3f\n' % tuple(mean) stdstr = '%.1f \t%.3f \t%.3f \t%.3f \t%.3f\n' % tuple(std) steadyfile.write(meanstr) steadyfile.write(stdstr) steadyfile.close() os.system('emacs ' + logsteadyfile)