Пример #1
0
import SigGen
import ReadUtil



if __name__ == '__main__':
    #print 'hello world'
    #ret = ReadUtil.readShinglingData('LSH_data_min.txt')
    #print "now count it"
    #m, shgMax, docCnt, shgCnt, totalShgCnt = SigGen.genMatrix(ret)
    #nHash = 2
    #print 'm:'
    #print m
    #print "shagMax"
    #print shgMax
    #print "docCnt, shgCnt, totalShgCnt"
    #print docCnt, shgCnt, totalShgCnt
    #print "=" * 20

    #sigM, pai = SigGen.minHashSig(m, shgMax, docCnt, shgCnt, totalShgCnt, nHash)
    #print "sigM"
    #print sigM 
    #print "pai"
    #print pai

    ret = ReadUtil.readShinglingData('LSH_data_min.txt')
    print "now count it"
    m, shgMax, docCnt, shgCnt, totalShgCnt = SigGen.genMatrix(ret)
    nHash = 2
    sigM, pai = SigGen.minHashSig(m, shgMax, docCnt, shgCnt, totalShgCnt, nHash)
Пример #2
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-  
# by zhangzhi @2013-10-11 23:45:57 
# Copyright 2013 NONE rights reserved.
import LSH
import SigGen

if __name__ == '__main__':
    sigMFileName = 'SIG_M.pickle'
    'load file'
    g_code2Shg, g_code2Doc, g_shg2Code, g_doc2Code, sigM = SigGen.loadSigM(sigMFileName)
    print sigM
    ret = LSH.sigMTopN(sigM, 50, 2, g_doc2Code[1], 100)
    print ret