item.append(tmp['title']) #3 item.append(tmp['institution']) #4 item.append(tmp['venue']) #5 item.append(tmp['address']) #6 item.append(tmp['publisher']) #7 item.append(tmp['year']) #8 item.append(tmp['pages']) #9 item.append(tmp['editor']) #10 item.append(tmp['note']) #11 item.append(tmp['month']) #12 rlist.append(item) for j in xrange(1, 13): if (item[j] != ''): s[j] += 1 print 'id, auth, vol, ttl, ins, ven, addr, pub, year, pag, edi, nt, mon' print s dim = len(rlist[0]) # f-swoosh: record dimension, record list, feature list, match func list, merge func list fsw = fswoosh(dim, rlist, flist, matchFuncList, mergeFuncList) result = fsw.compute() # evaluation: num after merging, num of records, result file, correct answer file eva = evaluate(len(result), len(coraObj), 'clusters.txt', 'cora-clusters.txt') eva.do() t2 = time.time() print 't2-t1: ' + str(t2-t1)
tmp = coraObj[str(buc[i])] item = [] item.append(str(buc[i])) #0 item.append(tmp['author']) #1 item.append(tmp['volume']) #2 item.append(tmp['title']) #3 item.append(tmp['institution']) #4 item.append(tmp['venue']) #5 item.append(tmp['address']) #6 item.append(tmp['publisher']) #7 item.append(tmp['year']) #8 item.append(tmp['pages']) #9 item.append(tmp['editor']) #10 item.append(tmp['note']) #11 item.append(tmp['month']) #12 rlist.append(item) dim = len(rlist[0]) fsw = fswoosh(dim, rlist, flist, matchFuncList, mergeFuncList) res = fsw.compute() rpool += res dim = len(rpool[0]) print len(rpool) matchFuncList[2] = levDistPool fswpool = fswoosh(dim, rpool, flist, matchFuncList, mergeFuncList) result = fswpool.compute() print len(result) eva = evaluate(len(result), len(coraObj), 'clusters.txt', 'cora-clusters.txt') eva.do() t2 = time.time() print 't2-t1: ' + str(t2-t1)